512 files changed, 9750 insertions, 6059 deletions
diff --git a/fs/9p/Kconfig b/fs/9p/Kconfig
index 0a93dc1cb4ac..55abfd62654a 100644
--- a/fs/9p/Kconfig
+++ b/fs/9p/Kconfig
@@ -11,8 +11,7 @@ config 9P_FS
 if 9P_FS
 config 9P_FSCACHE
-        bool "Enable 9P client caching support (EXPERIMENTAL)"
+        bool "Enable 9P client caching support"
-        depends on EXPERIMENTAL
        depends on 9P_FS=m && FSCACHE || 9P_FS=y && FSCACHE=y
        help
          Choose Y here to enable persistent, read-only local
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index 15b679166201..7af425f53bee 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -23,6 +23,7 @@
 #include "acl.h"
 #include "v9fs.h"
 #include "v9fs_vfs.h"
+#include "fid.h"
 static struct posix_acl *__v9fs_get_acl(struct p9_fid *fid, char *name)
 {
@@ -113,16 +114,12 @@ struct posix_acl *v9fs_iop_get_acl(struct inode *inode, int type)
 }
-static int v9fs_set_acl(struct dentry *dentry, int type, struct posix_acl *acl)
+static int v9fs_set_acl(struct p9_fid *fid, int type, struct posix_acl *acl)
 {
        int retval;
        char *name;
        size_t size;
        void *buffer;
-        struct inode *inode = dentry->d_inode;
-        set_cached_acl(inode, type, acl);
        if (!acl)
                return 0;
@@ -144,17 +141,16 @@ static int v9fs_set_acl(struct dentry *dentry, int type, struct posix_acl *acl)
        default:
                BUG();
        }
-        retval = v9fs_xattr_set(dentry, name, buffer, size, 0);
+        retval = v9fs_fid_xattr_set(fid, name, buffer, size, 0);
 err_free_out:
        kfree(buffer);
        return retval;
 }
-int v9fs_acl_chmod(struct dentry *dentry)
+int v9fs_acl_chmod(struct inode *inode, struct p9_fid *fid)
 {
        int retval = 0;
        struct posix_acl *acl;
-        struct inode *inode = dentry->d_inode;
        if (S_ISLNK(inode->i_mode))
                return -EOPNOTSUPP;
@@ -163,25 +159,30 @@ int v9fs_acl_chmod(struct dentry *dentry)
                retval = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode);
                if (retval)
                        return retval;
-                retval = v9fs_set_acl(dentry, ACL_TYPE_ACCESS, acl);
+                set_cached_acl(inode, ACL_TYPE_ACCESS, acl);
+                retval = v9fs_set_acl(fid, ACL_TYPE_ACCESS, acl);
                posix_acl_release(acl);
        }
        return retval;
 }
-int v9fs_set_create_acl(struct dentry *dentry,
+int v9fs_set_create_acl(struct inode *inode, struct p9_fid *fid,
-                        struct posix_acl **dpacl, struct posix_acl **pacl)
+                        struct posix_acl *dacl, struct posix_acl *acl)
 {
-        if (dentry) {
+        set_cached_acl(inode, ACL_TYPE_DEFAULT, dacl);
-                v9fs_set_acl(dentry, ACL_TYPE_DEFAULT, *dpacl);
+        set_cached_acl(inode, ACL_TYPE_ACCESS, acl);
-                v9fs_set_acl(dentry, ACL_TYPE_ACCESS, *pacl);
+        v9fs_set_acl(fid, ACL_TYPE_DEFAULT, dacl);
-        }
+        v9fs_set_acl(fid, ACL_TYPE_ACCESS, acl);
-        posix_acl_release(*dpacl);
-        posix_acl_release(*pacl);
-        *dpacl = *pacl = NULL;
        return 0;
 }
+void v9fs_put_acl(struct posix_acl *dacl,
+                  struct posix_acl *acl)
+{
+        posix_acl_release(dacl);
+        posix_acl_release(acl);
+}
 int v9fs_acl_mode(struct inode *dir, umode_t *modep,
                  struct posix_acl **dpacl, struct posix_acl **pacl)
 {
diff --git a/fs/9p/acl.h b/fs/9p/acl.h
index 559556411965..e4f7e882272b 100644
--- a/fs/9p/acl.h
+++ b/fs/9p/acl.h
@@ -17,27 +17,33 @@
 #ifdef CONFIG_9P_FS_POSIX_ACL
 extern int v9fs_get_acl(struct inode *, struct p9_fid *);
 extern struct posix_acl *v9fs_iop_get_acl(struct inode *inode, int type);
-extern int v9fs_acl_chmod(struct dentry *);
+extern int v9fs_acl_chmod(struct inode *, struct p9_fid *);
-extern int v9fs_set_create_acl(struct dentry *,
+extern int v9fs_set_create_acl(struct inode *, struct p9_fid *,
-                               struct posix_acl **, struct posix_acl **);
+                               struct posix_acl *, struct posix_acl *);
 extern int v9fs_acl_mode(struct inode *dir, umode_t *modep,
                         struct posix_acl **dpacl, struct posix_acl **pacl);
+extern void v9fs_put_acl(struct posix_acl *dacl, struct posix_acl *acl);
 #else
 #define v9fs_iop_get_acl NULL
 static inline int v9fs_get_acl(struct inode *inode, struct p9_fid *fid)
 {
        return 0;
 }
-static inline int v9fs_acl_chmod(struct dentry *dentry)
+static inline int v9fs_acl_chmod(struct inode *inode, struct p9_fid *fid)
 {
        return 0;
 }
-static inline int v9fs_set_create_acl(struct dentry *dentry,
+static inline int v9fs_set_create_acl(struct inode *inode,
-                                      struct posix_acl **dpacl,
+                                      struct p9_fid *fid,
-                                      struct posix_acl **pacl)
+                                      struct posix_acl *dacl,
+                                      struct posix_acl *acl)
 {
        return 0;
 }
+static inline void v9fs_put_acl(struct posix_acl *dacl,
+                                struct posix_acl *acl)
+{
+}
 static inline int v9fs_acl_mode(struct inode *dir, umode_t *modep,
                                struct posix_acl **dpacl,
                                struct posix_acl **pacl)
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index da8eefbe830d..afd4724b2d92 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -74,19 +74,20 @@ int v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid)
 *
 */
-static struct p9_fid *v9fs_fid_find(struct dentry *dentry, u32 uid, int any)
+static struct p9_fid *v9fs_fid_find(struct dentry *dentry, kuid_t uid, int any)
 {
        struct v9fs_dentry *dent;
        struct p9_fid *fid, *ret;
        p9_debug(P9_DEBUG_VFS, " dentry: %s (%p) uid %d any %d\n",
-                 dentry->d_name.name, dentry, uid, any);
+                 dentry->d_name.name, dentry, from_kuid(&init_user_ns, uid),
+                 any);
        dent = (struct v9fs_dentry *) dentry->d_fsdata;
        ret = NULL;
        if (dent) {
                spin_lock(&dent->lock);
                list_for_each_entry(fid, &dent->fidlist, dlist) {
-                        if (any || fid->uid == uid) {
+                        if (any || uid_eq(fid->uid, uid)) {
                                ret = fid;
                                break;
                        }
@@ -126,7 +127,7 @@ err_out:
 }
 static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry,
-                                               uid_t uid, int any)
+                                               kuid_t uid, int any)
 {
        struct dentry *ds;
        char **wnames, *uname;
@@ -233,7 +234,7 @@ err_out:
 struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
 {
-        uid_t uid;
+        kuid_t uid;
        int  any, access;
        struct v9fs_session_info *v9ses;
@@ -253,7 +254,7 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
                break;
        default:
-                uid = ~0;
+                uid = INVALID_UID;
                any = 0;
                break;
        }
@@ -272,7 +273,7 @@ struct p9_fid *v9fs_fid_clone(struct dentry *dentry)
        return ret;
 }
-static struct p9_fid *v9fs_fid_clone_with_uid(struct dentry *dentry, uid_t uid)
+static struct p9_fid *v9fs_fid_clone_with_uid(struct dentry *dentry, kuid_t uid)
 {
        struct p9_fid *fid, *ret;
@@ -289,7 +290,7 @@ struct p9_fid *v9fs_writeback_fid(struct dentry *dentry)
        int err;
        struct p9_fid *fid;
-        fid = v9fs_fid_clone_with_uid(dentry, 0);
+        fid = v9fs_fid_clone_with_uid(dentry, GLOBAL_ROOT_UID);
        if (IS_ERR(fid))
                goto error_out;
        /*
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index d934f04e7736..58e6cbce4156 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -161,7 +161,13 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
                                ret = r;
                                continue;
                        }
-                        v9ses->dfltuid = option;
+                        v9ses->dfltuid = make_kuid(current_user_ns(), option);
+                        if (!uid_valid(v9ses->dfltuid)) {
+                                p9_debug(P9_DEBUG_ERROR,
+                                         "uid field, but not a uid?\n");
+                                ret = -EINVAL;
+                                continue;
+                        }
                        break;
                case Opt_dfltgid:
                        r = match_int(&args[0], &option);
@@ -171,7 +177,13 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
                                ret = r;
                                continue;
                        }
-                        v9ses->dfltgid = option;
+                        v9ses->dfltgid = make_kgid(current_user_ns(), option);
+                        if (!gid_valid(v9ses->dfltgid)) {
+                                p9_debug(P9_DEBUG_ERROR,
+                                         "gid field, but not a gid?\n");
+                                ret = -EINVAL;
+                                continue;
+                        }
                        break;
                case Opt_afid:
                        r = match_int(&args[0], &option);
@@ -248,8 +260,9 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
                        else if (strcmp(s, "client") == 0) {
                                v9ses->flags |= V9FS_ACCESS_CLIENT;
                        } else {
+                                uid_t uid;
                                v9ses->flags |= V9FS_ACCESS_SINGLE;
-                                v9ses->uid = simple_strtoul(s, &e, 10);
+                                uid = simple_strtoul(s, &e, 10);
                                if (*e != '\0') {
                                        ret = -EINVAL;
                                        pr_info("Unknown access argument %s\n",
@@ -257,6 +270,13 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
                                        kfree(s);
                                        goto free_and_return;
                                }
+                                v9ses->uid = make_kuid(current_user_ns(), uid);
+                                if (!uid_valid(v9ses->uid)) {
+                                        ret = -EINVAL;
+                                        pr_info("Uknown uid %s\n", s);
+                                        kfree(s);
+                                        goto free_and_return;
+                                }
                        }
                        kfree(s);
@@ -319,7 +339,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
        list_add(&v9ses->slist, &v9fs_sessionlist);
        spin_unlock(&v9fs_sessionlist_lock);
-        v9ses->uid = ~0;
+        v9ses->uid = INVALID_UID;
        v9ses->dfltuid = V9FS_DEFUID;
        v9ses->dfltgid = V9FS_DEFGID;
@@ -364,7 +384,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
                v9ses->flags &= ~V9FS_ACCESS_MASK;
                v9ses->flags |= V9FS_ACCESS_ANY;
-                v9ses->uid = ~0;
+                v9ses->uid = INVALID_UID;
        }
        if (!v9fs_proto_dotl(v9ses) ||
                !((v9ses->flags & V9FS_ACCESS_MASK) == V9FS_ACCESS_CLIENT)) {
@@ -375,7 +395,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
                v9ses->flags &= ~V9FS_ACL_MASK;
        }
-        fid = p9_client_attach(v9ses->clnt, NULL, v9ses->uname, ~0,
+        fid = p9_client_attach(v9ses->clnt, NULL, v9ses->uname, INVALID_UID,
                                                        v9ses->aname);
        if (IS_ERR(fid)) {
                retval = PTR_ERR(fid);
@@ -387,7 +407,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
        if ((v9ses->flags & V9FS_ACCESS_MASK) == V9FS_ACCESS_SINGLE)
                fid->uid = v9ses->uid;
        else
-                fid->uid = ~0;
+                fid->uid = INVALID_UID;
 #ifdef CONFIG_9P_FSCACHE
        /* register the session for caching */
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 34c59f14a1c9..a8e127c89627 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -109,9 +109,9 @@ struct v9fs_session_info {
        char *uname;            /* user name to mount as */
        char *aname;            /* name of remote hierarchy being mounted */
        unsigned int maxdata;   /* max data for client interface */
-        unsigned int dfltuid;   /* default uid/muid for legacy support */
+        kuid_t dfltuid;         /* default uid/muid for legacy support */
-        unsigned int dfltgid;   /* default gid for legacy support */
+        kgid_t dfltgid;         /* default gid for legacy support */
-        u32 uid;                /* if ACCESS_SINGLE, the uid that has access */
+        kuid_t uid;             /* if ACCESS_SINGLE, the uid that has access */
        struct p9_client *clnt; /* 9p client */
        struct list_head slist; /* list of sessions registered with v9fs */
        struct backing_dev_info bdi;
@@ -165,8 +165,8 @@ extern struct inode *v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses,
 #define V9FS_PORT       564
 #define V9FS_DEFUSER    "nobody"
 #define V9FS_DEFANAME   ""
-#define V9FS_DEFUID     (-2)
+#define V9FS_DEFUID     KUIDT_INIT(-2)
-#define V9FS_DEFGID     (-2)
+#define V9FS_DEFGID     KGIDT_INIT(-2)
 static inline struct v9fs_session_info *v9fs_inode2v9ses(struct inode *inode)
 {
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index 64600b5d0522..9ad68628522c 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -137,6 +137,7 @@ out_valid:
 const struct dentry_operations v9fs_cached_dentry_operations = {
        .d_revalidate = v9fs_lookup_revalidate,
+        .d_weak_revalidate = v9fs_lookup_revalidate,
        .d_delete = v9fs_cached_dentry_delete,
        .d_release = v9fs_dentry_release,
 };
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index ff911e779651..be1e34adc3c6 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -52,10 +52,9 @@
 */
 struct p9_rdir {
-        struct mutex mutex;
        int head;
        int tail;
-        uint8_t *buf;
+        uint8_t buf[];
 };
 /**
@@ -93,33 +92,12 @@ static void p9stat_init(struct p9_wstat *stbuf)
 *
 */
-static int v9fs_alloc_rdir_buf(struct file *filp, int buflen)
+static struct p9_rdir *v9fs_alloc_rdir_buf(struct file *filp, int buflen)
 {
-        struct p9_rdir *rdir;
+        struct p9_fid *fid = filp->private_data;
-        struct p9_fid *fid;
+        if (!fid->rdir)
-        int err = 0;
+                fid->rdir = kzalloc(sizeof(struct p9_rdir) + buflen, GFP_KERNEL);
+        return fid->rdir;
-        fid = filp->private_data;
-        if (!fid->rdir) {
-                rdir = kmalloc(sizeof(struct p9_rdir) + buflen, GFP_KERNEL);
-                if (rdir == NULL) {
-                        err = -ENOMEM;
-                        goto exit;
-                }
-                spin_lock(&filp->f_dentry->d_lock);
-                if (!fid->rdir) {
-                        rdir->buf = (uint8_t *)rdir + sizeof(struct p9_rdir);
-                        mutex_init(&rdir->mutex);
-                        rdir->head = rdir->tail = 0;
-                        fid->rdir = (void *) rdir;
-                        rdir = NULL;
-                }
-                spin_unlock(&filp->f_dentry->d_lock);
-                kfree(rdir);
-        }
-exit:
-        return err;
 }
 /**
@@ -145,20 +123,16 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
        buflen = fid->clnt->msize - P9_IOHDRSZ;
-        err = v9fs_alloc_rdir_buf(filp, buflen);
+        rdir = v9fs_alloc_rdir_buf(filp, buflen);
-        if (err)
+        if (!rdir)
-                goto exit;
+                return -ENOMEM;
-        rdir = (struct p9_rdir *) fid->rdir;
-        err = mutex_lock_interruptible(&rdir->mutex);
+        while (1) {
-        if (err)
-                return err;
-        while (err == 0) {
                if (rdir->tail == rdir->head) {
                        err = v9fs_file_readn(filp, rdir->buf, NULL,
                                                        buflen, filp->f_pos);
                        if (err <= 0)
-                                goto unlock_and_exit;
+                                return err;
                        rdir->head = 0;
                        rdir->tail = err;
@@ -169,9 +143,8 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
                                          rdir->tail - rdir->head, &st);
                        if (err) {
                                p9_debug(P9_DEBUG_VFS, "returned %d\n", err);
-                                err = -EIO;
                                p9stat_free(&st);
-                                goto unlock_and_exit;
+                                return -EIO;
                        }
                        reclen = st.size+2;
@@ -180,19 +153,13 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
                        p9stat_free(&st);
-                        if (over) {
+                        if (over)
-                                err = 0;
+                                return 0;
-                                goto unlock_and_exit;
-                        }
                        rdir->head += reclen;
                        filp->f_pos += reclen;
                }
        }
-unlock_and_exit:
-        mutex_unlock(&rdir->mutex);
-exit:
-        return err;
 }
 /**
@@ -218,21 +185,16 @@ static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent,
        buflen = fid->clnt->msize - P9_READDIRHDRSZ;
-        err = v9fs_alloc_rdir_buf(filp, buflen);
+        rdir = v9fs_alloc_rdir_buf(filp, buflen);
-        if (err)
+        if (!rdir)
-                goto exit;
+                return -ENOMEM;
-        rdir = (struct p9_rdir *) fid->rdir;
-        err = mutex_lock_interruptible(&rdir->mutex);
+        while (1) {
-        if (err)
-                return err;
-        while (err == 0) {
                if (rdir->tail == rdir->head) {
                        err = p9_client_readdir(fid, rdir->buf, buflen,
                                                filp->f_pos);
                        if (err <= 0)
-                                goto unlock_and_exit;
+                                return err;
                        rdir->head = 0;
                        rdir->tail = err;
@@ -245,8 +207,7 @@ static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent,
                                            &curdirent);
                        if (err < 0) {
                                p9_debug(P9_DEBUG_VFS, "returned %d\n", err);
-                                err = -EIO;
+                                return -EIO;
-                                goto unlock_and_exit;
                        }
                        /* d_off in dirent structure tracks the offset into
@@ -261,20 +222,13 @@ static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent,
                                        curdirent.d_type);
                        oldoffset = curdirent.d_off;
-                        if (over) {
+                        if (over)
-                                err = 0;
+                                return 0;
-                                goto unlock_and_exit;
-                        }
                        filp->f_pos = curdirent.d_off;
                        rdir->head += err;
                }
        }
-unlock_and_exit:
-        mutex_unlock(&rdir->mutex);
-exit:
-        return err;
 }
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index c2483e97beee..d384a8b77ee8 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -80,10 +80,6 @@ int v9fs_file_open(struct inode *inode, struct file *file)
                        p9_client_clunk(fid);
                        return err;
                }
-                if (file->f_flags & O_TRUNC) {
-                        i_size_write(inode, 0);
-                        inode->i_blocks = 0;
-                }
                if ((file->f_flags & O_APPEND) &&
                        (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses)))
                        generic_file_llseek(file, 0, SEEK_END);
@@ -133,7 +129,7 @@ out_error:
 static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl)
 {
        int res = 0;
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        p9_debug(P9_DEBUG_VFS, "filp: %p lock: %p\n", filp, fl);
@@ -302,7 +298,7 @@ static int v9fs_file_getlock(struct file *filp, struct file_lock *fl)
 static int v9fs_file_lock_dotl(struct file *filp, int cmd, struct file_lock *fl)
 {
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        int ret = -ENOLCK;
        p9_debug(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n",
@@ -338,7 +334,7 @@ out_err:
 static int v9fs_file_flock_dotl(struct file *filp, int cmd,
        struct file_lock *fl)
 {
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        int ret = -ENOLCK;
        p9_debug(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n",
@@ -529,7 +525,7 @@ v9fs_file_write(struct file *filp, const char __user * data,
        if (!count)
                goto out;
-        retval = v9fs_file_write_internal(filp->f_path.dentry->d_inode,
+        retval = v9fs_file_write_internal(file_inode(filp),
                                        filp->private_data,
                                        data, count, &origin, 1);
        /* update offset on successful write */
@@ -604,7 +600,7 @@ v9fs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
        struct v9fs_inode *v9inode;
        struct page *page = vmf->page;
        struct file *filp = vma->vm_file;
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        p9_debug(P9_DEBUG_VFS, "page %p fid %lx\n",
@@ -620,6 +616,7 @@ v9fs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
        lock_page(page);
        if (page->mapping != inode->i_mapping)
                goto out_unlock;
+        wait_for_stable_page(page);
        return VM_FAULT_LOCKED;
 out_unlock:
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 890bed538f9b..b5340c829de1 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -192,9 +192,6 @@ int v9fs_uflags2omode(int uflags, int extended)
                break;
        }
-        if (uflags & O_TRUNC)
-                ret |= P9_OTRUNC;
        if (extended) {
                if (uflags & O_EXCL)
                        ret |= P9_OEXCL;
@@ -228,9 +225,9 @@ v9fs_blank_wstat(struct p9_wstat *wstat)
        wstat->uid = NULL;
        wstat->gid = NULL;
        wstat->muid = NULL;
-        wstat->n_uid = ~0;
+        wstat->n_uid = INVALID_UID;
-        wstat->n_gid = ~0;
+        wstat->n_gid = INVALID_GID;
-        wstat->n_muid = ~0;
+        wstat->n_muid = INVALID_UID;
        wstat->extension = NULL;
 }
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 40895546e103..61e4fa70a6fa 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -57,7 +57,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
 * group of the new file system object.
 */
-static gid_t v9fs_get_fsgid_for_create(struct inode *dir_inode)
+static kgid_t v9fs_get_fsgid_for_create(struct inode *dir_inode)
 {
        BUG_ON(dir_inode == NULL);
@@ -186,7 +186,6 @@ static int v9fs_mapped_dotl_flags(int flags)
                { O_CREAT,      P9_DOTL_CREATE },
                { O_EXCL,       P9_DOTL_EXCL },
                { O_NOCTTY,     P9_DOTL_NOCTTY },
-                { O_TRUNC,      P9_DOTL_TRUNC },
                { O_APPEND,     P9_DOTL_APPEND },
                { O_NONBLOCK,   P9_DOTL_NONBLOCK },
                { O_DSYNC,      P9_DOTL_DSYNC },
@@ -246,7 +245,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
                          int *opened)
 {
        int err = 0;
-        gid_t gid;
+        kgid_t gid;
        umode_t mode;
        char *name = NULL;
        struct p9_qid qid;
@@ -268,8 +267,14 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
        }
        /* Only creates */
-        if (!(flags & O_CREAT) || dentry->d_inode)
+        if (!(flags & O_CREAT))
-                return finish_no_open(file, res);
+                return  finish_no_open(file, res);
+        else if (dentry->d_inode) {
+                if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
+                        return -EEXIST;
+                else
+                        return finish_no_open(file, res);
+        }
        v9ses = v9fs_inode2v9ses(dir);
@@ -325,14 +330,14 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
                p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n", err);
                goto error;
        }
+        /* Now set the ACL based on the default value */
+        v9fs_set_create_acl(inode, fid, dacl, pacl);
        err = v9fs_fid_add(dentry, fid);
        if (err < 0)
                goto error;
        d_instantiate(dentry, inode);
-        /* Now set the ACL based on the default value */
-        v9fs_set_create_acl(dentry, &dacl, &pacl);
        v9inode = V9FS_I(inode);
        mutex_lock(&v9inode->v_mutex);
        if (v9ses->cache && !v9inode->writeback_fid &&
@@ -364,6 +369,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
 #endif
        *opened |= FILE_CREATED;
 out:
+        v9fs_put_acl(dacl, pacl);
        dput(res);
        return err;
@@ -373,7 +379,6 @@ error:
 err_clunk_old_fid:
        if (ofid)
                p9_client_clunk(ofid);
-        v9fs_set_create_acl(NULL, &dacl, &pacl);
        goto out;
 }
@@ -391,7 +396,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
        int err;
        struct v9fs_session_info *v9ses;
        struct p9_fid *fid = NULL, *dfid = NULL;
-        gid_t gid;
+        kgid_t gid;
        char *name;
        umode_t mode;
        struct inode *inode;
@@ -430,17 +435,17 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
        if (err < 0)
                goto error;
+        fid = p9_client_walk(dfid, 1, &name, 1);
+        if (IS_ERR(fid)) {
+                err = PTR_ERR(fid);
+                p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
+                         err);
+                fid = NULL;
+                goto error;
+        }
        /* instantiate inode and assign the unopened fid to the dentry */
        if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
-                fid = p9_client_walk(dfid, 1, &name, 1);
-                if (IS_ERR(fid)) {
-                        err = PTR_ERR(fid);
-                        p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
-                                 err);
-                        fid = NULL;
-                        goto error;
-                }
                inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
                if (IS_ERR(inode)) {
                        err = PTR_ERR(inode);
@@ -451,6 +456,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
                err = v9fs_fid_add(dentry, fid);
                if (err < 0)
                        goto error;
+                v9fs_set_create_acl(inode, fid, dacl, pacl);
                d_instantiate(dentry, inode);
                fid = NULL;
        } else {
@@ -464,16 +470,15 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
                        err = PTR_ERR(inode);
                        goto error;
                }
+                v9fs_set_create_acl(inode, fid, dacl, pacl);
                d_instantiate(dentry, inode);
        }
-        /* Now set the ACL based on the default value */
-        v9fs_set_create_acl(dentry, &dacl, &pacl);
        inc_nlink(dir);
        v9fs_invalidate_inode_attr(dir);
 error:
        if (fid)
                p9_client_clunk(fid);
-        v9fs_set_create_acl(NULL, &dacl, &pacl);
+        v9fs_put_acl(dacl, pacl);
        return err;
 }
@@ -567,10 +572,11 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
        struct v9fs_session_info *v9ses;
        struct p9_fid *fid;
        struct p9_iattr_dotl p9attr;
+        struct inode *inode = dentry->d_inode;
        p9_debug(P9_DEBUG_VFS, "\n");
-        retval = inode_change_ok(dentry->d_inode, iattr);
+        retval = inode_change_ok(inode, iattr);
        if (retval)
                return retval;
@@ -591,23 +597,23 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
                return PTR_ERR(fid);
        /* Write all dirty data */
-        if (S_ISREG(dentry->d_inode->i_mode))
+        if (S_ISREG(inode->i_mode))
-                filemap_write_and_wait(dentry->d_inode->i_mapping);
+                filemap_write_and_wait(inode->i_mapping);
        retval = p9_client_setattr(fid, &p9attr);
        if (retval < 0)
                return retval;
        if ((iattr->ia_valid & ATTR_SIZE) &&
-            iattr->ia_size != i_size_read(dentry->d_inode))
+            iattr->ia_size != i_size_read(inode))
-                truncate_setsize(dentry->d_inode, iattr->ia_size);
+                truncate_setsize(inode, iattr->ia_size);
-        v9fs_invalidate_inode_attr(dentry->d_inode);
+        v9fs_invalidate_inode_attr(inode);
-        setattr_copy(dentry->d_inode, iattr);
+        setattr_copy(inode, iattr);
-        mark_inode_dirty(dentry->d_inode);
+        mark_inode_dirty(inode);
        if (iattr->ia_valid & ATTR_MODE) {
                /* We also want to update ACL when we update mode bits */
-                retval = v9fs_acl_chmod(dentry);
+                retval = v9fs_acl_chmod(inode, fid);
                if (retval < 0)
                        return retval;
        }
@@ -692,7 +698,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
                const char *symname)
 {
        int err;
-        gid_t gid;
+        kgid_t gid;
        char *name;
        struct p9_qid qid;
        struct inode *inode;
@@ -832,7 +838,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
                dev_t rdev)
 {
        int err;
-        gid_t gid;
+        kgid_t gid;
        char *name;
        umode_t mode;
        struct v9fs_session_info *v9ses;
@@ -875,17 +881,17 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
                goto error;
        v9fs_invalidate_inode_attr(dir);
+        fid = p9_client_walk(dfid, 1, &name, 1);
+        if (IS_ERR(fid)) {
+                err = PTR_ERR(fid);
+                p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
+                         err);
+                fid = NULL;
+                goto error;
+        }
        /* instantiate inode and assign the unopened fid to the dentry */
        if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
-                fid = p9_client_walk(dfid, 1, &name, 1);
-                if (IS_ERR(fid)) {
-                        err = PTR_ERR(fid);
-                        p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
-                                 err);
-                        fid = NULL;
-                        goto error;
-                }
                inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
                if (IS_ERR(inode)) {
                        err = PTR_ERR(inode);
@@ -893,6 +899,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
                                 err);
                        goto error;
                }
+                v9fs_set_create_acl(inode, fid, dacl, pacl);
                err = v9fs_fid_add(dentry, fid);
                if (err < 0)
                        goto error;
@@ -908,14 +915,13 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
                        err = PTR_ERR(inode);
                        goto error;
                }
+                v9fs_set_create_acl(inode, fid, dacl, pacl);
                d_instantiate(dentry, inode);
        }
-        /* Now set the ACL based on the default value */
-        v9fs_set_create_acl(dentry, &dacl, &pacl);
 error:
        if (fid)
                p9_client_clunk(fid);
-        v9fs_set_create_acl(NULL, &dacl, &pacl);
+        v9fs_put_acl(dacl, pacl);
        return err;
 }
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 137d50396898..91dad63e5a2d 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -363,5 +363,5 @@ struct file_system_type v9fs_fs_type = {
        .mount = v9fs_mount,
        .kill_sb = v9fs_kill_super,
        .owner = THIS_MODULE,
-        .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT,
+        .fs_flags = FS_RENAME_DOES_D_MOVE,
 };
diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c
index 29653b70a9c3..c45e016b190f 100644
--- a/fs/9p/xattr.c
+++ b/fs/9p/xattr.c
@@ -111,19 +111,26 @@ ssize_t v9fs_xattr_get(struct dentry *dentry, const char *name,
 int v9fs_xattr_set(struct dentry *dentry, const char *name,
                   const void *value, size_t value_len, int flags)
 {
+        struct p9_fid *fid = v9fs_fid_lookup(dentry);
+        if (IS_ERR(fid))
+                return PTR_ERR(fid);
+        return v9fs_fid_xattr_set(fid, name, value, value_len, flags);
+}
+int v9fs_fid_xattr_set(struct p9_fid *fid, const char *name,
+                   const void *value, size_t value_len, int flags)
+{
        u64 offset = 0;
        int retval, msize, write_count;
-        struct p9_fid *fid = NULL;
        p9_debug(P9_DEBUG_VFS, "name = %s value_len = %zu flags = %d\n",
                 name, value_len, flags);
-        fid = v9fs_fid_clone(dentry);
+        /* Clone it */
-        if (IS_ERR(fid)) {
+        fid = p9_client_walk(fid, 0, NULL, 1);
-                retval = PTR_ERR(fid);
+        if (IS_ERR(fid))
-                fid = NULL;
+                return PTR_ERR(fid);
-                goto error;
-        }
        /*
         * On success fid points to xattr
         */
@@ -131,7 +138,8 @@ int v9fs_xattr_set(struct dentry *dentry, const char *name,
        if (retval < 0) {
                p9_debug(P9_DEBUG_VFS, "p9_client_xattrcreate failed %d\n",
                         retval);
-                goto error;
+                p9_client_clunk(fid);
+                return retval;
        }
        msize = fid->clnt->msize;
        while (value_len) {
@@ -144,17 +152,12 @@ int v9fs_xattr_set(struct dentry *dentry, const char *name,
                if (write_count < 0) {
                        /* error in xattr write */
                        retval = write_count;
-                        goto error;
+                        break;
                }
                offset += write_count;
                value_len -= write_count;
        }
-        /* Total read xattr bytes */
+        return p9_client_clunk(fid);
-        retval = offset;
-error:
-        if (fid)
-                retval = p9_client_clunk(fid);
-        return retval;
 }
 ssize_t v9fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
diff --git a/fs/9p/xattr.h b/fs/9p/xattr.h
index eaa837c53bd5..eec348a3df71 100644
--- a/fs/9p/xattr.h
+++ b/fs/9p/xattr.h
@@ -27,6 +27,8 @@ extern ssize_t v9fs_fid_xattr_get(struct p9_fid *, const char *,
                                  void *, size_t);
 extern ssize_t v9fs_xattr_get(struct dentry *, const char *,
                              void *, size_t);
+extern int v9fs_fid_xattr_set(struct p9_fid *, const char *,
+                          const void *, size_t, int);
 extern int v9fs_xattr_set(struct dentry *, const char *,
                          const void *, size_t, int);
 extern ssize_t v9fs_listxattr(struct dentry *, char *, size_t);
diff --git a/fs/Kconfig b/fs/Kconfig
index cfe512fd1caf..780725a463b1 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -68,16 +68,6 @@ source "fs/quota/Kconfig"
 source "fs/autofs4/Kconfig"
 source "fs/fuse/Kconfig"
-config CUSE
-        tristate "Character device in Userspace support"
-        depends on FUSE_FS
-        help
-          This FUSE extension allows character devices to be
-          implemented in userspace.
-          If you want to develop or use userspace character device
-          based on CUSE, answer Y or M.
 config GENERIC_ACL
        bool
        select FS_POSIX_ACL
diff --git a/fs/adfs/Kconfig b/fs/adfs/Kconfig
index e55182a74605..c5a7787dd5e9 100644
--- a/fs/adfs/Kconfig
+++ b/fs/adfs/Kconfig
@@ -1,6 +1,6 @@
 config ADFS_FS
-        tristate "ADFS file system support (EXPERIMENTAL)"
+        tristate "ADFS file system support"
-        depends on BLOCK && EXPERIMENTAL
+        depends on BLOCK
        help
          The Acorn Disc Filing System is the standard file system of the
          RiscOS operating system which runs on Acorn's ARM-based Risc PC
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index b3be2e7c5643..9cf874ce8336 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -19,7 +19,7 @@ static DEFINE_RWLOCK(adfs_dir_lock);
 static int
 adfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct super_block *sb = inode->i_sb;
        struct adfs_dir_ops *ops = ADFS_SB(sb)->s_dir;
        struct object_info obj;
diff --git a/fs/affs/Kconfig b/fs/affs/Kconfig
index cfad9afb4762..a04d9e848d05 100644
--- a/fs/affs/Kconfig
+++ b/fs/affs/Kconfig
@@ -1,6 +1,6 @@
 config AFFS_FS
-        tristate "Amiga FFS file system support (EXPERIMENTAL)"
+        tristate "Amiga FFS file system support"
-        depends on BLOCK && EXPERIMENTAL
+        depends on BLOCK
        help
          The Fast File System (FFS) is the common file system used on hard
          disks by Amiga(tm) systems since AmigaOS Version 1.3 (34.20).  Say Y
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index eb82ee53ee0b..d9a43674cb94 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -125,9 +125,8 @@ static void
 affs_fix_dcache(struct inode *inode, u32 entry_ino)
 {
        struct dentry *dentry;
-        struct hlist_node *p;
        spin_lock(&inode->i_lock);
-        hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) {
+        hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
                if (entry_ino == (u32)(long)dentry->d_fsdata) {
                        dentry->d_fsdata = (void *)inode->i_ino;
                        break;
diff --git a/fs/affs/dir.c b/fs/affs/dir.c
index 8ca8f3a55599..fd11a6d608ee 100644
--- a/fs/affs/dir.c
+++ b/fs/affs/dir.c
@@ -42,7 +42,7 @@ const struct inode_operations affs_dir_inode_operations = {
 static int
 affs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-        struct inode            *inode = filp->f_path.dentry->d_inode;
+        struct inode            *inode = file_inode(filp);
        struct super_block      *sb = inode->i_sb;
        struct buffer_head      *dir_bh;
        struct buffer_head      *fh_bh;
diff --git a/fs/afs/Kconfig b/fs/afs/Kconfig
index 8f975f25b486..ebba3b18e5da 100644
--- a/fs/afs/Kconfig
+++ b/fs/afs/Kconfig
@@ -1,6 +1,6 @@
 config AFS_FS
-        tristate "Andrew File System support (AFS) (EXPERIMENTAL)"
+        tristate "Andrew File System support (AFS)"
-        depends on INET && EXPERIMENTAL
+        depends on INET
        select AF_RXRPC
        select DNS_RESOLVER
        help
@@ -22,8 +22,7 @@ config AFS_DEBUG
          If unsure, say N.
 config AFS_FSCACHE
-        bool "Provide AFS client caching support (EXPERIMENTAL)"
+        bool "Provide AFS client caching support"
-        depends on EXPERIMENTAL
        depends on AFS_FS=m && FSCACHE || AFS_FS=y && FSCACHE=y
        help
          Say Y here if you want AFS data to be cached locally on disk through
diff --git a/fs/afs/afs.h b/fs/afs/afs.h
index c548aa346f0d..3c462ff6db63 100644
--- a/fs/afs/afs.h
+++ b/fs/afs/afs.h
@@ -119,8 +119,8 @@ struct afs_file_status {
        u64                     size;           /* file size */
        afs_dataversion_t       data_version;   /* current data version */
        u32                     author;         /* author ID */
-        u32                     owner;          /* owner ID */
+        kuid_t                  owner;          /* owner ID */
-        u32                     group;          /* group ID */
+        kgid_t                  group;          /* group ID */
        afs_access_t            caller_access;  /* access rights for authenticated caller */
        afs_access_t            anon_access;    /* access rights for unauthenticated caller */
        umode_t                 mode;           /* UNIX mode */
@@ -133,13 +133,6 @@ struct afs_file_status {
 /*
 * AFS file status change request
 */
-struct afs_store_status {
-        u32                     mask;           /* which bits of the struct are set */
-        u32                     mtime_client;   /* last time client changed data */
-        u32                     owner;          /* owner ID */
-        u32                     group;          /* group ID */
-        umode_t                 mode;           /* UNIX mode */
-};
 #define AFS_SET_MTIME           0x01            /* set the mtime */
 #define AFS_SET_OWNER           0x02            /* set the owner ID */
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index db477906ba4f..7a465ed04444 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -393,12 +393,12 @@ static int afs_readdir(struct file *file, void *cookie, filldir_t filldir)
        int ret;
        _enter("{%Ld,{%lu}}",
-               file->f_pos, file->f_path.dentry->d_inode->i_ino);
+               file->f_pos, file_inode(file)->i_ino);
        ASSERT(file->private_data != NULL);
        fpos = file->f_pos;
-        ret = afs_dir_iterate(file->f_path.dentry->d_inode, &fpos,
+        ret = afs_dir_iterate(file_inode(file), &fpos,
                              cookie, filldir, file->private_data);
        file->f_pos = fpos;
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 757d664575dd..2497bf306c70 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -514,7 +514,7 @@ error:
 */
 int afs_lock(struct file *file, int cmd, struct file_lock *fl)
 {
-        struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode);
+        struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
        _enter("{%x:%u},%d,{t=%x,fl=%x,r=%Ld:%Ld}",
               vnode->fid.vid, vnode->fid.vnode, cmd,
@@ -537,7 +537,7 @@ int afs_lock(struct file *file, int cmd, struct file_lock *fl)
 */
 int afs_flock(struct file *file, int cmd, struct file_lock *fl)
 {
-        struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode);
+        struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
        _enter("{%x:%u},%d,{t=%x,fl=%x}",
               vnode->fid.vid, vnode->fid.vnode, cmd,
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index b960ff05ea0b..c2e930ec2888 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -42,6 +42,8 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
        umode_t mode;
        u64 data_version, size;
        u32 changed = 0; /* becomes non-zero if ctime-type changes seen */
+        kuid_t owner;
+        kgid_t group;
 #define EXTRACT(DST)                            \
        do {                                    \
@@ -56,7 +58,9 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
        size = ntohl(*bp++);
        data_version = ntohl(*bp++);
        EXTRACT(status->author);
-        EXTRACT(status->owner);
+        owner = make_kuid(&init_user_ns, ntohl(*bp++));
+        changed |= !uid_eq(owner, status->owner);
+        status->owner = owner;
        EXTRACT(status->caller_access); /* call ticket dependent */
        EXTRACT(status->anon_access);
        EXTRACT(status->mode);
@@ -65,7 +69,9 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
        bp++; /* seg size */
        status->mtime_client = ntohl(*bp++);
        status->mtime_server = ntohl(*bp++);
-        EXTRACT(status->group);
+        group = make_kgid(&init_user_ns, ntohl(*bp++));
+        changed |= !gid_eq(group, status->group);
+        status->group = group;
        bp++; /* sync counter */
        data_version |= (u64) ntohl(*bp++) << 32;
        EXTRACT(status->lock_count);
@@ -181,12 +187,12 @@ static void xdr_encode_AFS_StoreStatus(__be32 **_bp, struct iattr *attr)
        if (attr->ia_valid & ATTR_UID) {
                mask |= AFS_SET_OWNER;
-                owner = attr->ia_uid;
+                owner = from_kuid(&init_user_ns, attr->ia_uid);
        }
        if (attr->ia_valid & ATTR_GID) {
                mask |= AFS_SET_GROUP;
-                group = attr->ia_gid;
+                group = from_kgid(&init_user_ns, attr->ia_gid);
        }
        if (attr->ia_valid & ATTR_MODE) {
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 95cffd38239f..789bc253b5f6 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -69,7 +69,7 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
        set_nlink(inode, vnode->status.nlink);
        inode->i_uid            = vnode->status.owner;
-        inode->i_gid            = 0;
+        inode->i_gid            = GLOBAL_ROOT_GID;
        inode->i_size           = vnode->status.size;
        inode->i_ctime.tv_sec   = vnode->status.mtime_server;
        inode->i_ctime.tv_nsec  = 0;
@@ -175,8 +175,8 @@ struct inode *afs_iget_autocell(struct inode *dir, const char *dev_name,
        inode->i_mode           = S_IFDIR | S_IRUGO | S_IXUGO;
        inode->i_op             = &afs_autocell_inode_operations;
        set_nlink(inode, 2);
-        inode->i_uid            = 0;
+        inode->i_uid            = GLOBAL_ROOT_UID;
-        inode->i_gid            = 0;
+        inode->i_gid            = GLOBAL_ROOT_GID;
        inode->i_ctime.tv_sec   = get_seconds();
        inode->i_ctime.tv_nsec  = 0;
        inode->i_atime          = inode->i_mtime = inode->i_ctime;
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 43165009428d..7c31ec399575 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -24,6 +24,8 @@
 #include <linux/parser.h>
 #include <linux/statfs.h>
 #include <linux/sched.h>
+#include <linux/nsproxy.h>
+#include <net/net_namespace.h>
 #include "internal.h"
 #define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */
@@ -363,6 +365,10 @@ static struct dentry *afs_mount(struct file_system_type *fs_type,
        memset(&params, 0, sizeof(params));
+        ret = -EINVAL;
+        if (current->nsproxy->net_ns != &init_net)
+                goto error;
        /* parse the options and device name */
        if (options) {
                ret = afs_parse_options(&params, options, &dev_name);
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 9aa52d93c73c..7e03eadb40c0 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -120,7 +120,7 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
                    struct page **pagep, void **fsdata)
 {
        struct afs_writeback *candidate, *wb;
-        struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode);
+        struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
        struct page *page;
        struct key *key = file->private_data;
        unsigned from = pos & (PAGE_CACHE_SIZE - 1);
@@ -245,7 +245,7 @@ int afs_write_end(struct file *file, struct address_space *mapping,
                  loff_t pos, unsigned len, unsigned copied,
                  struct page *page, void *fsdata)
 {
-        struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode);
+        struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
        loff_t i_size, maybe_i_size;
        _enter("{%x:%u},{%lx}",
@@ -627,8 +627,7 @@ void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call)
 ssize_t afs_file_write(struct kiocb *iocb, const struct iovec *iov,
                       unsigned long nr_segs, loff_t pos)
 {
-        struct dentry *dentry = iocb->ki_filp->f_path.dentry;
+        struct afs_vnode *vnode = AFS_FS_I(file_inode(iocb->ki_filp));
-        struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode);
        ssize_t result;
        size_t count = iov_length(iov, nr_segs);
diff --git a/fs/aio.c b/fs/aio.c
index 71f613cf4a85..3f941f2a3059 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -101,7 +101,7 @@ static int aio_setup_ring(struct kioctx *ctx)
        struct aio_ring *ring;
        struct aio_ring_info *info = &ctx->ring_info;
        unsigned nr_events = ctx->max_reqs;
-        unsigned long size;
+        unsigned long size, populate;
        int nr_pages;
        /* Compensate for the ring buffer's head/tail overlap entry */
@@ -129,7 +129,8 @@ static int aio_setup_ring(struct kioctx *ctx)
        down_write(&ctx->mm->mmap_sem);
        info->mmap_base = do_mmap_pgoff(NULL, 0, info->mmap_size, 
                                        PROT_READ|PROT_WRITE,
-                                        MAP_ANONYMOUS|MAP_PRIVATE, 0);
+                                        MAP_ANONYMOUS|MAP_PRIVATE, 0,
+                                        &populate);
        if (IS_ERR((void *)info->mmap_base)) {
                up_write(&ctx->mm->mmap_sem);
                info->mmap_size = 0;
@@ -147,6 +148,8 @@ static int aio_setup_ring(struct kioctx *ctx)
                aio_free_ring(ctx);
                return -EAGAIN;
        }
+        if (populate)
+                mm_populate(info->mmap_base, populate);
        ctx->user_id = info->mmap_base;
@@ -588,11 +591,10 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
 {
        struct mm_struct *mm = current->mm;
        struct kioctx *ctx, *ret = NULL;
-        struct hlist_node *n;
        rcu_read_lock();
-        hlist_for_each_entry_rcu(ctx, n, &mm->ioctx_list, list) {
+        hlist_for_each_entry_rcu(ctx, &mm->ioctx_list, list) {
                /*
                 * RCU protects us against accessing freed memory but
                 * we have to be careful not to get a reference when the
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 28d39fb84ae3..47a65df8c871 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -131,7 +131,6 @@ struct file *anon_inode_getfile(const char *name,
        struct qstr this;
        struct path path;
        struct file *file;
-        int error;
        if (IS_ERR(anon_inode_inode))
                return ERR_PTR(-ENODEV);
@@ -143,7 +142,7 @@ struct file *anon_inode_getfile(const char *name,
         * Link the inode to a directory entry by creating a unique name
         * using the inode sequence number.
         */
-        error = -ENOMEM;
+        file = ERR_PTR(-ENOMEM);
        this.name = name;
        this.len = strlen(name);
        this.hash = 0;
@@ -160,15 +159,12 @@ struct file *anon_inode_getfile(const char *name,
        d_instantiate(path.dentry, anon_inode_inode);
-        error = -ENFILE;
        file = alloc_file(&path, OPEN_FMODE(flags), fops);
-        if (!file)
+        if (IS_ERR(file))
                goto err_dput;
        file->f_mapping = anon_inode_inode->i_mapping;
-        file->f_pos = 0;
        file->f_flags = flags & (O_ACCMODE | O_NONBLOCK);
-        file->f_version = 0;
        file->private_data = priv;
        return file;
@@ -177,7 +173,7 @@ err_dput:
        path_put(&path);
 err_module:
        module_put(fops->owner);
-        return ERR_PTR(error);
+        return file;
 }
 EXPORT_SYMBOL_GPL(anon_inode_getfile);
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index b785e7707959..3f1128b37e46 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -273,7 +273,7 @@ static inline int autofs_prepare_pipe(struct file *pipe)
 {
        if (!pipe->f_op || !pipe->f_op->write)
                return -EINVAL;
-        if (!S_ISFIFO(pipe->f_dentry->d_inode->i_mode))
+        if (!S_ISFIFO(file_inode(pipe)->i_mode))
                return -EINVAL;
        /* We want a packet pipe */
        pipe->f_flags |= O_DIRECT;
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 9f68a37bb2b2..743c7c2c949d 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -159,7 +159,7 @@ static struct autofs_sb_info *autofs_dev_ioctl_sbi(struct file *f)
        struct inode *inode;
        if (f) {
-                inode = f->f_path.dentry->d_inode;
+                inode = file_inode(f);
                sbi = autofs4_sbi(inode->i_sb);
        }
        return sbi;
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index c93447604da8..230bd2aad4f4 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -587,7 +587,7 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
        
        /* This allows root to remove symlinks */
        if (!autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN))
-                return -EACCES;
+                return -EPERM;
        if (atomic_dec_and_test(&ino->count)) {
                p_ino = autofs4_dentry_ino(dentry->d_parent);
@@ -874,7 +874,7 @@ static int autofs4_root_ioctl_unlocked(struct inode *inode, struct file *filp,
 static long autofs4_root_ioctl(struct file *filp,
                               unsigned int cmd, unsigned long arg)
 {
-        struct inode *inode = filp->f_dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        return autofs4_root_ioctl_unlocked(inode, filp, cmd, arg);
 }
@@ -882,7 +882,7 @@ static long autofs4_root_ioctl(struct file *filp,
 static long autofs4_root_compat_ioctl(struct file *filp,
                             unsigned int cmd, unsigned long arg)
 {
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        int ret;
        if (cmd == AUTOFS_IOC_READY || cmd == AUTOFS_IOC_FAIL)
diff --git a/fs/befs/Kconfig b/fs/befs/Kconfig
index 7835d30f211f..edc5cc2aefad 100644
--- a/fs/befs/Kconfig
+++ b/fs/befs/Kconfig
@@ -1,6 +1,6 @@
 config BEFS_FS
-        tristate "BeOS file system (BeFS) support (read only) (EXPERIMENTAL)"
+        tristate "BeOS file system (BeFS) support (read only)"
-        depends on BLOCK && EXPERIMENTAL
+        depends on BLOCK
        select NLS
        help
          The BeOS File System (BeFS) is the native file system of Be, Inc's
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 2b3bda8d5e68..c8f4e25eb9e2 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -213,7 +213,7 @@ befs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 static int
 befs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct super_block *sb = inode->i_sb;
        befs_data_stream *ds = &BEFS_I(inode)->i_data.ds;
        befs_off_t value;
diff --git a/fs/bfs/Kconfig b/fs/bfs/Kconfig
index c2336c62024f..3728a6479c64 100644
--- a/fs/bfs/Kconfig
+++ b/fs/bfs/Kconfig
@@ -1,6 +1,6 @@
 config BFS_FS
-        tristate "BFS file system support (EXPERIMENTAL)"
+        tristate "BFS file system support"
-        depends on BLOCK && EXPERIMENTAL
+        depends on BLOCK
        help
          Boot File System (BFS) is a file system used under SCO UnixWare to
          allow the bootloader access to the kernel image and other important
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 2785ef91191a..3f422f6bb5ca 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -28,7 +28,7 @@ static struct buffer_head *bfs_find_entry(struct inode *dir,
 static int bfs_readdir(struct file *f, void *dirent, filldir_t filldir)
 {
-        struct inode *dir = f->f_path.dentry->d_inode;
+        struct inode *dir = file_inode(f);
        struct buffer_head *bh;
        struct bfs_dirent *de;
        struct bfs_sb_info *info = BFS_SB(dir->i_sb);
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 6043567b95c2..bbc8f8827eac 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -214,7 +214,7 @@ static int load_aout_binary(struct linux_binprm * bprm)
        if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC &&
             N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) ||
            N_TRSIZE(ex) || N_DRSIZE(ex) ||
-            i_size_read(bprm->file->f_path.dentry->d_inode) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
+            i_size_read(file_inode(bprm->file)) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
                return -ENOEXEC;
        }
@@ -367,7 +367,7 @@ static int load_aout_library(struct file *file)
        int retval;
        struct exec ex;
-        inode = file->f_path.dentry->d_inode;
+        inode = file_inode(file);
        retval = -ENOEXEC;
        error = kernel_read(file, 0, (char *) &ex, sizeof(ex));
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 0c42cdbabecf..a5702d74d2bd 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -33,6 +33,7 @@
 #include <linux/elf.h>
 #include <linux/utsname.h>
 #include <linux/coredump.h>
+#include <linux/sched.h>
 #include <asm/uaccess.h>
 #include <asm/param.h>
 #include <asm/page.h>
@@ -1140,7 +1141,7 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma,
        /* By default, dump shared memory if mapped from an anonymous file. */
        if (vma->vm_flags & VM_SHARED) {
-                if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
+                if (file_inode(vma->vm_file)->i_nlink == 0 ?
                    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
                        goto whole;
                return 0;
@@ -1248,7 +1249,7 @@ static int writenote(struct memelfnote *men, struct file *file,
 #undef DUMP_WRITE
 static void fill_elf_header(struct elfhdr *elf, int segs,
-                            u16 machine, u32 flags, u8 osabi)
+                            u16 machine, u32 flags)
 {
        memset(elf, 0, sizeof(*elf));
@@ -1320,8 +1321,11 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
                cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
                cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
        } else {
-                cputime_to_timeval(p->utime, &prstatus->pr_utime);
+                cputime_t utime, stime;
-                cputime_to_timeval(p->stime, &prstatus->pr_stime);
+                task_cputime(p, &utime, &stime);
+                cputime_to_timeval(utime, &prstatus->pr_utime);
+                cputime_to_timeval(stime, &prstatus->pr_stime);
        }
        cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
        cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
@@ -1630,7 +1634,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
         * Initialize the ELF file header.
         */
        fill_elf_header(elf, phdrs,
-                        view->e_machine, view->e_flags, view->ei_osabi);
+                        view->e_machine, view->e_flags);
        /*
         * Allocate a structure for each thread.
@@ -1870,7 +1874,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
        elf_core_copy_regs(&info->prstatus->pr_reg, regs);
        /* Set up header */
-        fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
+        fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
        /*
         * Set up the notes in similar form to SVR4 core dumps made
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index dc84732e554f..9c13e023e2b7 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -909,7 +909,7 @@ static int elf_fdpic_map_file(struct elf_fdpic_params *params,
 dynamic_error:
        printk("ELF FDPIC %s with invalid DYNAMIC section (inode=%lu)\n",
-               what, file->f_path.dentry->d_inode->i_ino);
+               what, file_inode(file)->i_ino);
        return -ELIBBAD;
 }
@@ -1219,7 +1219,7 @@ static int maydump(struct vm_area_struct *vma, unsigned long mm_flags)
        /* By default, dump shared memory if mapped from an anonymous file. */
        if (vma->vm_flags & VM_SHARED) {
-                if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0) {
+                if (file_inode(vma->vm_file)->i_nlink == 0) {
                        dump_ok = test_bit(MMF_DUMP_ANON_SHARED, &mm_flags);
                        kdcore("%08lx: %08lx: %s (share)", vma->vm_start,
                               vma->vm_flags, dump_ok ? "yes" : "no");
@@ -1375,8 +1375,11 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
                cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
                cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
        } else {
-                cputime_to_timeval(p->utime, &prstatus->pr_utime);
+                cputime_t utime, stime;
-                cputime_to_timeval(p->stime, &prstatus->pr_stime);
+                task_cputime(p, &utime, &stime);
+                cputime_to_timeval(utime, &prstatus->pr_utime);
+                cputime_to_timeval(stime, &prstatus->pr_stime);
        }
        cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
        cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index b56371981d16..2036d21baaef 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -438,7 +438,7 @@ static int load_flat_file(struct linux_binprm * bprm,
        int ret;
        hdr = ((struct flat_hdr *) bprm->buf);          /* exec-header */
-        inode = bprm->file->f_path.dentry->d_inode;
+        inode = file_inode(bprm->file);
        text_len  = ntohl(hdr->data_start);
        data_len  = ntohl(hdr->data_end) - ntohl(hdr->data_start);
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 0c8869fdd14e..fecbbf3f8ff2 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -531,7 +531,7 @@ static void kill_node(Node *e)
 static ssize_t
 bm_entry_read(struct file * file, char __user * buf, size_t nbytes, loff_t *ppos)
 {
-        Node *e = file->f_path.dentry->d_inode->i_private;
+        Node *e = file_inode(file)->i_private;
        ssize_t res;
        char *page;
@@ -550,7 +550,7 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
                                size_t count, loff_t *ppos)
 {
        struct dentry *root;
-        Node *e = file->f_path.dentry->d_inode->i_private;
+        Node *e = file_inode(file)->i_private;
        int res = parse_command(buffer, count);
        switch (res) {
diff --git a/fs/bio.c b/fs/bio.c
index b96fc6ce4855..bb5768f59b32 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1428,6 +1428,8 @@ void bio_endio(struct bio *bio, int error)
        else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
                error = -EIO;
+        trace_block_bio_complete(bio, error);
        if (bio->bi_end_io)
                bio->bi_end_io(bio, error);
 }
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 172f8491a2bd..aea605c98ba6 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -318,7 +318,7 @@ static int blkdev_write_end(struct file *file, struct address_space *mapping,
 /*
 * private llseek:
- * for a block special file file->f_path.dentry->d_inode->i_size is zero
+ * for a block special file file_inode(file)->i_size is zero
 * so we compute the size by hand (just as in block_read/write above)
 */
 static loff_t block_llseek(struct file *file, loff_t offset, int whence)
@@ -994,6 +994,7 @@ int revalidate_disk(struct gendisk *disk)
        mutex_lock(&bdev->bd_mutex);
        check_disk_size_change(disk, bdev);
+        bdev->bd_invalidated = 0;
        mutex_unlock(&bdev->bd_mutex);
        bdput(bdev);
        return ret;
@@ -1032,7 +1033,9 @@ void bd_set_size(struct block_device *bdev, loff_t size)
 {
        unsigned bsize = bdev_logical_block_size(bdev);
-        bdev->bd_inode->i_size = size;
+        mutex_lock(&bdev->bd_inode->i_mutex);
+        i_size_write(bdev->bd_inode, size);
+        mutex_unlock(&bdev->bd_inode->i_mutex);
        while (bsize < PAGE_CACHE_SIZE) {
                if (size & bsize)
                        break;
@@ -1117,7 +1120,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
                                }
                        }
-                        if (!ret && !bdev->bd_openers) {
+                        if (!ret) {
                                bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
                                bdi = blk_get_backing_dev_info(bdev);
                                if (bdi == NULL)
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index d33f01c08b60..ccd25ba7a9ac 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -1,6 +1,5 @@
 config BTRFS_FS
-        tristate "Btrfs filesystem (EXPERIMENTAL) Unstable disk format"
+        tristate "Btrfs filesystem Unstable disk format"
-        depends on EXPERIMENTAL
        select LIBCRC32C
        select ZLIB_INFLATE
        select ZLIB_DEFLATE
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 614f34a899c2..81ee29eeb7ca 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -22,10 +22,10 @@ static int btrfs_encode_fh(struct inode *inode, u32 *fh, int *max_len,
        if (parent && (len < BTRFS_FID_SIZE_CONNECTABLE)) {
                *max_len = BTRFS_FID_SIZE_CONNECTABLE;
-                return 255;
+                return FILEID_INVALID;
        } else if (len < BTRFS_FID_SIZE_NON_CONNECTABLE) {
                *max_len = BTRFS_FID_SIZE_NON_CONNECTABLE;
-                return 255;
+                return FILEID_INVALID;
        }
        len  = BTRFS_FID_SIZE_NON_CONNECTABLE;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 521e9d4424f6..cf54bdfee334 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3689,20 +3689,6 @@ static int can_overcommit(struct btrfs_root *root,
        return 0;
 }
-static int writeback_inodes_sb_nr_if_idle_safe(struct super_block *sb,
-                                               unsigned long nr_pages,
-                                               enum wb_reason reason)
-{
-        if (!writeback_in_progress(sb->s_bdi) &&
-            down_read_trylock(&sb->s_umount)) {
-                writeback_inodes_sb_nr(sb, nr_pages, reason);
-                up_read(&sb->s_umount);
-                return 1;
-        }
-        return 0;
-}
 /*
 * shrink metadata reservation for delalloc
 */
@@ -3735,9 +3721,9 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
        while (delalloc_bytes && loops < 3) {
                max_reclaim = min(delalloc_bytes, to_reclaim);
                nr_pages = max_reclaim >> PAGE_CACHE_SHIFT;
-                writeback_inodes_sb_nr_if_idle_safe(root->fs_info->sb,
+                try_to_writeback_inodes_sb_nr(root->fs_info->sb,
-                                                    nr_pages,
+                                              nr_pages,
-                                                    WB_REASON_FS_FREE_SPACE);
+                                              WB_REASON_FS_FREE_SPACE);
                /*
                 * We need to wait for the async pages to actually start before
@@ -3997,7 +3983,7 @@ again:
         * We make the other tasks wait for the flush only when we can flush
         * all things.
         */
-        if (ret && flush == BTRFS_RESERVE_FLUSH_ALL) {
+        if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
                flushing = true;
                space_info->flush = 1;
        }
@@ -4534,7 +4520,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
        unsigned nr_extents = 0;
        int extra_reserve = 0;
        enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
-        int ret;
+        int ret = 0;
        bool delalloc_lock = true;
        /* If we are a free space inode we need to not flush since we will be in
@@ -4579,20 +4565,18 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
        csum_bytes = BTRFS_I(inode)->csum_bytes;
        spin_unlock(&BTRFS_I(inode)->lock);
-        if (root->fs_info->quota_enabled) {
+        if (root->fs_info->quota_enabled)
                ret = btrfs_qgroup_reserve(root, num_bytes +
                                           nr_extents * root->leafsize);
-                if (ret) {
-                        spin_lock(&BTRFS_I(inode)->lock);
-                        calc_csum_metadata_size(inode, num_bytes, 0);
-                        spin_unlock(&BTRFS_I(inode)->lock);
-                        if (delalloc_lock)
-                                mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
-                        return ret;
-                }
-        }
-        ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
+        /*
+         * ret != 0 here means the qgroup reservation failed, we go straight to
+         * the shared error handling then.
+         */
+        if (ret == 0)
+                ret = reserve_metadata_bytes(root, block_rsv,
+                                             to_reserve, flush);
        if (ret) {
                u64 to_free = 0;
                unsigned dropped;
@@ -5560,7 +5544,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
        int empty_cluster = 2 * 1024 * 1024;
        struct btrfs_space_info *space_info;
        int loop = 0;
-        int index = 0;
+        int index = __get_raid_index(data);
        int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ?
                RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC;
        bool found_uncached_bg = false;
@@ -6524,7 +6508,7 @@ reada:
 }
 /*
- * hepler to process tree block while walking down the tree.
+ * helper to process tree block while walking down the tree.
 *
 * when wc->stage == UPDATE_BACKREF, this function updates
 * back refs for pointers in the block.
@@ -6599,7 +6583,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
 }
 /*
- * hepler to process tree block pointer.
+ * helper to process tree block pointer.
 *
 * when wc->stage == DROP_REFERENCE, this function checks
 * reference count of the block pointed to. if the block
@@ -6737,7 +6721,7 @@ skip:
 }
 /*
- * hepler to process tree block while walking up the tree.
+ * helper to process tree block while walking up the tree.
 *
 * when wc->stage == DROP_REFERENCE, this function drops
 * reference count on the block.
@@ -6788,11 +6772,13 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
                                                       &wc->flags[level]);
                        if (ret < 0) {
                                btrfs_tree_unlock_rw(eb, path->locks[level]);
+                                path->locks[level] = 0;
                                return ret;
                        }
                        BUG_ON(wc->refs[level] == 0);
                        if (wc->refs[level] == 1) {
                                btrfs_tree_unlock_rw(eb, path->locks[level]);
+                                path->locks[level] = 0;
                                return 1;
                        }
                }
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index f169d6b11d7f..fdb7a8db3b57 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -171,6 +171,10 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next)
        if (test_bit(EXTENT_FLAG_COMPRESSED, &prev->flags))
                return 0;
+        if (test_bit(EXTENT_FLAG_LOGGING, &prev->flags) ||
+            test_bit(EXTENT_FLAG_LOGGING, &next->flags))
+                return 0;
        if (extent_map_end(prev) == next->start &&
            prev->flags == next->flags &&
            prev->bdev == next->bdev &&
@@ -255,7 +259,8 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len,
        if (!em)
                goto out;
-        list_move(&em->list, &tree->modified_extents);
+        if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags))
+                list_move(&em->list, &tree->modified_extents);
        em->generation = gen;
        clear_bit(EXTENT_FLAG_PINNED, &em->flags);
        em->mod_start = em->start;
@@ -280,6 +285,13 @@ out:
 }
+void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em)
+{
+        clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
+        if (em->in_tree)
+                try_merge_map(tree, em);
+}
 /**
 * add_extent_mapping - add new extent map to the extent tree
 * @tree:       tree to insert new map in
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index 922943ce29e8..c6598c89cff8 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -69,6 +69,7 @@ void free_extent_map(struct extent_map *em);
 int __init extent_map_init(void);
 void extent_map_exit(void);
 int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, u64 gen);
+void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em);
 struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
                                         u64 start, u64 len);
 #endif
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index bd38cef42358..94aa53b38721 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -460,8 +460,8 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
                if (!contig)
                        offset = page_offset(bvec->bv_page) + bvec->bv_offset;
-                if (!contig && (offset >= ordered->file_offset + ordered->len ||
+                if (offset >= ordered->file_offset + ordered->len ||
-                    offset < ordered->file_offset)) {
+                    offset < ordered->file_offset) {
                        unsigned long bytes_left;
                        sums->len = this_sum_bytes;
                        this_sum_bytes = 0;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 77061bf43edb..4b241fe9d2fe 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -293,15 +293,24 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
        struct btrfs_key key;
        struct btrfs_ioctl_defrag_range_args range;
        int num_defrag;
+        int index;
+        int ret;
        /* get the inode */
        key.objectid = defrag->root;
        btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
        key.offset = (u64)-1;
+        index = srcu_read_lock(&fs_info->subvol_srcu);
        inode_root = btrfs_read_fs_root_no_name(fs_info, &key);
        if (IS_ERR(inode_root)) {
-                kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
+                ret = PTR_ERR(inode_root);
-                return PTR_ERR(inode_root);
+                goto cleanup;
+        }
+        if (btrfs_root_refs(&inode_root->root_item) == 0) {
+                ret = -ENOENT;
+                goto cleanup;
        }
        key.objectid = defrag->ino;
@@ -309,9 +318,10 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
        key.offset = 0;
        inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL);
        if (IS_ERR(inode)) {
-                kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
+                ret = PTR_ERR(inode);
-                return PTR_ERR(inode);
+                goto cleanup;
        }
+        srcu_read_unlock(&fs_info->subvol_srcu, index);
        /* do a chunk of defrag */
        clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
@@ -346,6 +356,10 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
        iput(inode);
        return 0;
+cleanup:
+        srcu_read_unlock(&fs_info->subvol_srcu, index);
+        kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
+        return ret;
 }
 /*
@@ -1211,7 +1225,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
        struct extent_state *cached_state = NULL;
        int i;
        unsigned long index = pos >> PAGE_CACHE_SHIFT;
-        struct inode *inode = fdentry(file)->d_inode;
+        struct inode *inode = file_inode(file);
        gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
        int err = 0;
        int faili = 0;
@@ -1298,7 +1312,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
                                               struct iov_iter *i,
                                               loff_t pos)
 {
-        struct inode *inode = fdentry(file)->d_inode;
+        struct inode *inode = file_inode(file);
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct page **pages = NULL;
        unsigned long first_index;
@@ -1486,7 +1500,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
                                    unsigned long nr_segs, loff_t pos)
 {
        struct file *file = iocb->ki_filp;
-        struct inode *inode = fdentry(file)->d_inode;
+        struct inode *inode = file_inode(file);
        struct btrfs_root *root = BTRFS_I(inode)->root;
        loff_t *ppos = &iocb->ki_pos;
        u64 start_pos;
@@ -1594,9 +1608,10 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
                if (err < 0 && num_written > 0)
                        num_written = err;
        }
-out:
        if (sync)
                atomic_dec(&BTRFS_I(inode)->sync_writers);
+out:
        sb_end_write(inode->i_sb);
        current->backing_dev_info = NULL;
        return num_written ? num_written : err;
@@ -2087,7 +2102,7 @@ out:
 static long btrfs_fallocate(struct file *file, int mode,
                            loff_t offset, loff_t len)
 {
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(file);
        struct extent_state *cached_state = NULL;
        u64 cur_offset;
        u64 last_byte;
@@ -2241,6 +2256,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
        if (lockend <= lockstart)
                lockend = lockstart + root->sectorsize;
+        lockend--;
        len = lockend - lockstart + 1;
        len = max_t(u64, len, root->sectorsize);
@@ -2307,9 +2323,12 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
                                        }
                                }
-                                *offset = start;
+                                if (!test_bit(EXTENT_FLAG_PREALLOC,
-                                free_extent_map(em);
+                                              &em->flags)) {
-                                break;
+                                        *offset = start;
+                                        free_extent_map(em);
+                                        break;
+                                }
                        }
                }
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 59ea2e4349c9..0be7a8742a43 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -1862,11 +1862,13 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
 {
        struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
        struct btrfs_free_space *info;
-        int ret = 0;
+        int ret;
+        bool re_search = false;
        spin_lock(&ctl->tree_lock);
 again:
+        ret = 0;
        if (!bytes)
                goto out_lock;
@@ -1879,17 +1881,17 @@ again:
                info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
                                          1, 0);
                if (!info) {
-                        /* the tree logging code might be calling us before we
+                        /*
-                         * have fully loaded the free space rbtree for this
+                         * If we found a partial bit of our free space in a
-                         * block group.  So it is possible the entry won't
+                         * bitmap but then couldn't find the other part this may
-                         * be in the rbtree yet at all.  The caching code
+                         * be a problem, so WARN about it.
-                         * will make sure not to put it in the rbtree if
-                         * the logging code has pinned it.
                         */
+                        WARN_ON(re_search);
                        goto out_lock;
                }
        }
+        re_search = false;
        if (!info->bitmap) {
                unlink_free_space(ctl, info);
                if (offset == info->offset) {
@@ -1935,8 +1937,10 @@ again:
        }
        ret = remove_from_bitmap(ctl, info, &offset, &bytes);
-        if (ret == -EAGAIN)
+        if (ret == -EAGAIN) {
+                re_search = true;
                goto again;
+        }
        BUG_ON(ret); /* logic error */
 out_lock:
        spin_unlock(&ctl->tree_lock);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 16d9e8e191e6..55c07b650378 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -88,7 +88,7 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
        [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
 };
-static int btrfs_setsize(struct inode *inode, loff_t newsize);
+static int btrfs_setsize(struct inode *inode, struct iattr *attr);
 static int btrfs_truncate(struct inode *inode);
 static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
 static noinline int cow_file_range(struct inode *inode,
@@ -2478,6 +2478,18 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
                                continue;
                        }
                        nr_truncate++;
+                        /* 1 for the orphan item deletion. */
+                        trans = btrfs_start_transaction(root, 1);
+                        if (IS_ERR(trans)) {
+                                ret = PTR_ERR(trans);
+                                goto out;
+                        }
+                        ret = btrfs_orphan_add(trans, inode);
+                        btrfs_end_transaction(trans, root);
+                        if (ret)
+                                goto out;
                        ret = btrfs_truncate(inode);
                } else {
                        nr_unlink++;
@@ -3665,6 +3677,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
                                block_end - cur_offset, 0);
                if (IS_ERR(em)) {
                        err = PTR_ERR(em);
+                        em = NULL;
                        break;
                }
                last_byte = min(extent_map_end(em), block_end);
@@ -3748,16 +3761,27 @@ next:
        return err;
 }
-static int btrfs_setsize(struct inode *inode, loff_t newsize)
+static int btrfs_setsize(struct inode *inode, struct iattr *attr)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_trans_handle *trans;
        loff_t oldsize = i_size_read(inode);
+        loff_t newsize = attr->ia_size;
+        int mask = attr->ia_valid;
        int ret;
        if (newsize == oldsize)
                return 0;
+        /*
+         * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
+         * special case where we need to update the times despite not having
+         * these flags set.  For all other operations the VFS set these flags
+         * explicitly if it wants a timestamp update.
+         */
+        if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME))))
+                inode->i_ctime = inode->i_mtime = current_fs_time(inode->i_sb);
        if (newsize > oldsize) {
                truncate_pagecache(inode, oldsize, newsize);
                ret = btrfs_cont_expand(inode, oldsize, newsize);
@@ -3783,9 +3807,34 @@ static int btrfs_setsize(struct inode *inode, loff_t newsize)
                        set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
                                &BTRFS_I(inode)->runtime_flags);
+                /*
+                 * 1 for the orphan item we're going to add
+                 * 1 for the orphan item deletion.
+                 */
+                trans = btrfs_start_transaction(root, 2);
+                if (IS_ERR(trans))
+                        return PTR_ERR(trans);
+                /*
+                 * We need to do this in case we fail at _any_ point during the
+                 * actual truncate.  Once we do the truncate_setsize we could
+                 * invalidate pages which forces any outstanding ordered io to
+                 * be instantly completed which will give us extents that need
+                 * to be truncated.  If we fail to get an orphan inode down we
+                 * could have left over extents that were never meant to live,
+                 * so we need to garuntee from this point on that everything
+                 * will be consistent.
+                 */
+                ret = btrfs_orphan_add(trans, inode);
+                btrfs_end_transaction(trans, root);
+                if (ret)
+                        return ret;
                /* we don't support swapfiles, so vmtruncate shouldn't fail */
                truncate_setsize(inode, newsize);
                ret = btrfs_truncate(inode);
+                if (ret && inode->i_nlink)
+                        btrfs_orphan_del(NULL, inode);
        }
        return ret;
@@ -3805,7 +3854,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
                return err;
        if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
-                err = btrfs_setsize(inode, attr->ia_size);
+                err = btrfs_setsize(inode, attr);
                if (err)
                        return err;
        }
@@ -4342,7 +4391,7 @@ unsigned char btrfs_filetype_table[] = {
 static int btrfs_real_readdir(struct file *filp, void *dirent,
                              filldir_t filldir)
 {
-        struct inode *inode = filp->f_dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_item *item;
        struct btrfs_dir_item *di;
@@ -5572,10 +5621,13 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag
                return em;
        if (em) {
                /*
-                 * if our em maps to a hole, there might
+                 * if our em maps to
-                 * actually be delalloc bytes behind it
+                 * -  a hole or
+                 * -  a pre-alloc extent,
+                 * there might actually be delalloc bytes behind it.
                 */
-                if (em->block_start != EXTENT_MAP_HOLE)
+                if (em->block_start != EXTENT_MAP_HOLE &&
+                    !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
                        return em;
                else
                        hole_em = em;
@@ -5657,6 +5709,8 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag
                         */
                        em->block_start = hole_em->block_start;
                        em->block_len = hole_len;
+                        if (test_bit(EXTENT_FLAG_PREALLOC, &hole_em->flags))
+                                set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
                } else {
                        em->start = range_start;
                        em->len = found;
@@ -6737,7 +6791,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
 int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
        struct page *page = vmf->page;
-        struct inode *inode = fdentry(vma->vm_file)->d_inode;
+        struct inode *inode = file_inode(vma->vm_file);
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
        struct btrfs_ordered_extent *ordered;
@@ -6915,11 +6969,9 @@ static int btrfs_truncate(struct inode *inode)
        /*
         * 1 for the truncate slack space
-         * 1 for the orphan item we're going to add
-         * 1 for the orphan item deletion
         * 1 for updating the inode.
         */
-        trans = btrfs_start_transaction(root, 4);
+        trans = btrfs_start_transaction(root, 2);
        if (IS_ERR(trans)) {
                err = PTR_ERR(trans);
                goto out;
@@ -6930,12 +6982,6 @@ static int btrfs_truncate(struct inode *inode)
                                      min_size);
        BUG_ON(ret);
-        ret = btrfs_orphan_add(trans, inode);
-        if (ret) {
-                btrfs_end_transaction(trans, root);
-                goto out;
-        }
        /*
         * setattr is responsible for setting the ordered_data_close flag,
         * but that is only tested during the last file release.  That
@@ -7004,12 +7050,6 @@ static int btrfs_truncate(struct inode *inode)
                ret = btrfs_orphan_del(trans, inode);
                if (ret)
                        err = ret;
-        } else if (ret && inode->i_nlink > 0) {
-                /*
-                 * Failed to do the truncate, remove us from the in memory
-                 * orphan list.
-                 */
-                ret = btrfs_orphan_del(NULL, inode);
        }
        if (trans) {
@@ -7531,41 +7571,61 @@ void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work)
 */
 int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
 {
-        struct list_head *head = &root->fs_info->delalloc_inodes;
        struct btrfs_inode *binode;
        struct inode *inode;
        struct btrfs_delalloc_work *work, *next;
        struct list_head works;
+        struct list_head splice;
        int ret = 0;
        if (root->fs_info->sb->s_flags & MS_RDONLY)
                return -EROFS;
        INIT_LIST_HEAD(&works);
+        INIT_LIST_HEAD(&splice);
+again:
        spin_lock(&root->fs_info->delalloc_lock);
-        while (!list_empty(head)) {
+        list_splice_init(&root->fs_info->delalloc_inodes, &splice);
-                binode = list_entry(head->next, struct btrfs_inode,
+        while (!list_empty(&splice)) {
+                binode = list_entry(splice.next, struct btrfs_inode,
                                    delalloc_inodes);
+                list_del_init(&binode->delalloc_inodes);
                inode = igrab(&binode->vfs_inode);
                if (!inode)
-                        list_del_init(&binode->delalloc_inodes);
+                        continue;
+                list_add_tail(&binode->delalloc_inodes,
+                              &root->fs_info->delalloc_inodes);
                spin_unlock(&root->fs_info->delalloc_lock);
-                if (inode) {
-                        work = btrfs_alloc_delalloc_work(inode, 0, delay_iput);
+                work = btrfs_alloc_delalloc_work(inode, 0, delay_iput);
-                        if (!work) {
+                if (unlikely(!work)) {
-                                ret = -ENOMEM;
+                        ret = -ENOMEM;
-                                goto out;
+                        goto out;
-                        }
-                        list_add_tail(&work->list, &works);
-                        btrfs_queue_worker(&root->fs_info->flush_workers,
-                                           &work->work);
                }
+                list_add_tail(&work->list, &works);
+                btrfs_queue_worker(&root->fs_info->flush_workers,
+                                   &work->work);
                cond_resched();
                spin_lock(&root->fs_info->delalloc_lock);
        }
        spin_unlock(&root->fs_info->delalloc_lock);
+        list_for_each_entry_safe(work, next, &works, list) {
+                list_del_init(&work->list);
+                btrfs_wait_and_free_delalloc_work(work);
+        }
+        spin_lock(&root->fs_info->delalloc_lock);
+        if (!list_empty(&root->fs_info->delalloc_inodes)) {
+                spin_unlock(&root->fs_info->delalloc_lock);
+                goto again;
+        }
+        spin_unlock(&root->fs_info->delalloc_lock);
        /* the filemap_flush will queue IO into the worker threads, but
         * we have to make sure the IO is actually started and that
         * ordered extents get created before we return
@@ -7578,11 +7638,18 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
                    atomic_read(&root->fs_info->async_delalloc_pages) == 0));
        }
        atomic_dec(&root->fs_info->async_submit_draining);
+        return 0;
 out:
        list_for_each_entry_safe(work, next, &works, list) {
                list_del_init(&work->list);
                btrfs_wait_and_free_delalloc_work(work);
        }
+        if (!list_empty_careful(&splice)) {
+                spin_lock(&root->fs_info->delalloc_lock);
+                list_splice_tail(&splice, &root->fs_info->delalloc_inodes);
+                spin_unlock(&root->fs_info->delalloc_lock);
+        }
        return ret;
 }
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 4b4516770f05..c3f09f71bedd 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -152,7 +152,7 @@ void btrfs_inherit_iflags(struct inode *inode, struct inode *dir)
 static int btrfs_ioctl_getflags(struct file *file, void __user *arg)
 {
-        struct btrfs_inode *ip = BTRFS_I(file->f_path.dentry->d_inode);
+        struct btrfs_inode *ip = BTRFS_I(file_inode(file));
        unsigned int flags = btrfs_flags_to_ioctl(ip->flags);
        if (copy_to_user(arg, &flags, sizeof(flags)))
@@ -177,7 +177,7 @@ static int check_flags(unsigned int flags)
 static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
 {
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(file);
        struct btrfs_inode *ip = BTRFS_I(inode);
        struct btrfs_root *root = ip->root;
        struct btrfs_trans_handle *trans;
@@ -310,7 +310,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
 static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
 {
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(file);
        return put_user(inode->i_generation, arg);
 }
@@ -515,7 +515,6 @@ static noinline int create_subvol(struct btrfs_root *root,
        BUG_ON(ret);
-        d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
 fail:
        if (async_transid) {
                *async_transid = trans->transid;
@@ -525,6 +524,10 @@ fail:
        }
        if (err && !ret)
                ret = err;
+        if (!ret)
+                d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
        return ret;
 }
@@ -1317,7 +1320,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
        u64 new_size;
        u64 old_size;
        u64 devid = 1;
-        struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
+        struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
        struct btrfs_ioctl_vol_args *vol_args;
        struct btrfs_trans_handle *trans;
        struct btrfs_device *device = NULL;
@@ -1339,7 +1342,8 @@ static noinline int btrfs_ioctl_resize(struct file *file,
        if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
                        1)) {
                pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
-                return -EINPROGRESS;
+                mnt_drop_write_file(file);
+                return -EINVAL;
        }
        mutex_lock(&root->fs_info->volume_mutex);
@@ -1362,6 +1366,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
                printk(KERN_INFO "btrfs: resizing devid %llu\n",
                       (unsigned long long)devid);
        }
        device = btrfs_find_device(root->fs_info, devid, NULL, NULL);
        if (!device) {
                printk(KERN_INFO "btrfs: resizer unable to find device %llu\n",
@@ -1369,9 +1374,10 @@ static noinline int btrfs_ioctl_resize(struct file *file,
                ret = -EINVAL;
                goto out_free;
        }
-        if (device->fs_devices && device->fs_devices->seeding) {
+        if (!device->writeable) {
                printk(KERN_INFO "btrfs: resizer unable to apply on "
-                       "seeding device %llu\n",
+                       "readonly device %llu\n",
                       (unsigned long long)devid);
                ret = -EINVAL;
                goto out_free;
@@ -1443,8 +1449,8 @@ out_free:
        kfree(vol_args);
 out:
        mutex_unlock(&root->fs_info->volume_mutex);
-        mnt_drop_write_file(file);
        atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
+        mnt_drop_write_file(file);
        return ret;
 }
@@ -1483,8 +1489,8 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
                        goto out_drop_write;
                }
-                src_inode = src.file->f_path.dentry->d_inode;
+                src_inode = file_inode(src.file);
-                if (src_inode->i_sb != file->f_path.dentry->d_inode->i_sb) {
+                if (src_inode->i_sb != file_inode(file)->i_sb) {
                        printk(KERN_INFO "btrfs: Snapshot src from "
                               "another FS\n");
                        ret = -EINVAL;
@@ -1576,7 +1582,7 @@ out:
 static noinline int btrfs_ioctl_subvol_getflags(struct file *file,
                                                void __user *arg)
 {
-        struct inode *inode = fdentry(file)->d_inode;
+        struct inode *inode = file_inode(file);
        struct btrfs_root *root = BTRFS_I(inode)->root;
        int ret = 0;
        u64 flags = 0;
@@ -1598,7 +1604,7 @@ static noinline int btrfs_ioctl_subvol_getflags(struct file *file,
 static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
                                              void __user *arg)
 {
-        struct inode *inode = fdentry(file)->d_inode;
+        struct inode *inode = file_inode(file);
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_trans_handle *trans;
        u64 root_flags;
@@ -1892,7 +1898,7 @@ static noinline int btrfs_ioctl_tree_search(struct file *file,
        if (IS_ERR(args))
                return PTR_ERR(args);
-        inode = fdentry(file)->d_inode;
+        inode = file_inode(file);
        ret = search_ioctl(inode, args);
        if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
                ret = -EFAULT;
@@ -2002,7 +2008,7 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file,
        if (IS_ERR(args))
                return PTR_ERR(args);
-        inode = fdentry(file)->d_inode;
+        inode = file_inode(file);
        if (args->treeid == 0)
                args->treeid = BTRFS_I(inode)->root->root_key.objectid;
@@ -2095,13 +2101,13 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
                err = inode_permission(inode, MAY_WRITE | MAY_EXEC);
                if (err)
                        goto out_dput;
-                /* check if subvolume may be deleted by a non-root user */
-                err = btrfs_may_delete(dir, dentry, 1);
-                if (err)
-                        goto out_dput;
        }
+        /* check if subvolume may be deleted by a user */
+        err = btrfs_may_delete(dir, dentry, 1);
+        if (err)
+                goto out_dput;
        if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) {
                err = -EINVAL;
                goto out_dput;
@@ -2178,24 +2184,25 @@ out:
 static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
 {
-        struct inode *inode = fdentry(file)->d_inode;
+        struct inode *inode = file_inode(file);
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_ioctl_defrag_range_args *range;
        int ret;
-        if (btrfs_root_readonly(root))
+        ret = mnt_want_write_file(file);
-                return -EROFS;
+        if (ret)
+                return ret;
        if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
                        1)) {
                pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
-                return -EINPROGRESS;
+                mnt_drop_write_file(file);
+                return -EINVAL;
        }
-        ret = mnt_want_write_file(file);
-        if (ret) {
+        if (btrfs_root_readonly(root)) {
-                atomic_set(&root->fs_info->mutually_exclusive_operation_running,
+                ret = -EROFS;
-                           0);
+                goto out;
-                return ret;
        }
        switch (inode->i_mode & S_IFMT) {
@@ -2237,7 +2244,7 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
                        /* the rest are all set to zero by kzalloc */
                        range->len = (u64)-1;
                }
-                ret = btrfs_defrag_file(fdentry(file)->d_inode, file,
+                ret = btrfs_defrag_file(file_inode(file), file,
                                        range, 0, 0);
                if (ret > 0)
                        ret = 0;
@@ -2247,8 +2254,8 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
                ret = -EINVAL;
        }
 out:
-        mnt_drop_write_file(file);
        atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
+        mnt_drop_write_file(file);
        return ret;
 }
@@ -2263,7 +2270,7 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
        if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
                        1)) {
                pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
-                return -EINPROGRESS;
+                return -EINVAL;
        }
        mutex_lock(&root->fs_info->volume_mutex);
@@ -2285,7 +2292,7 @@ out:
 static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
 {
-        struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
+        struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
        struct btrfs_ioctl_vol_args *vol_args;
        int ret;
@@ -2300,7 +2307,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
                        1)) {
                pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
                mnt_drop_write_file(file);
-                return -EINPROGRESS;
+                return -EINVAL;
        }
        mutex_lock(&root->fs_info->volume_mutex);
@@ -2316,8 +2323,8 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
        kfree(vol_args);
 out:
        mutex_unlock(&root->fs_info->volume_mutex);
-        mnt_drop_write_file(file);
        atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
+        mnt_drop_write_file(file);
        return ret;
 }
@@ -2408,7 +2415,7 @@ out:
 static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
                                       u64 off, u64 olen, u64 destoff)
 {
-        struct inode *inode = fdentry(file)->d_inode;
+        struct inode *inode = file_inode(file);
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct fd src_file;
        struct inode *src;
@@ -2454,7 +2461,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
        if (src_file.file->f_path.mnt != file->f_path.mnt)
                goto out_fput;
-        src = src_file.file->f_dentry->d_inode;
+        src = file_inode(src_file.file);
        ret = -EINVAL;
        if (src == inode)
@@ -2816,7 +2823,7 @@ static long btrfs_ioctl_clone_range(struct file *file, void __user *argp)
 */
 static long btrfs_ioctl_trans_start(struct file *file)
 {
-        struct inode *inode = fdentry(file)->d_inode;
+        struct inode *inode = file_inode(file);
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_trans_handle *trans;
        int ret;
@@ -2856,7 +2863,7 @@ out:
 static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
 {
-        struct inode *inode = fdentry(file)->d_inode;
+        struct inode *inode = file_inode(file);
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_root *new_root;
        struct btrfs_dir_item *di;
@@ -3080,7 +3087,7 @@ out:
 */
 long btrfs_ioctl_trans_end(struct file *file)
 {
-        struct inode *inode = fdentry(file)->d_inode;
+        struct inode *inode = file_inode(file);
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_trans_handle *trans;
@@ -3142,7 +3149,7 @@ static noinline long btrfs_ioctl_wait_sync(struct btrfs_root *root,
 static long btrfs_ioctl_scrub(struct file *file, void __user *arg)
 {
-        struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
+        struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
        struct btrfs_ioctl_scrub_args *sa;
        int ret;
@@ -3433,12 +3440,12 @@ void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
 static long btrfs_ioctl_balance(struct file *file, void __user *arg)
 {
-        struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
+        struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
        struct btrfs_fs_info *fs_info = root->fs_info;
        struct btrfs_ioctl_balance_args *bargs;
        struct btrfs_balance_control *bctl;
+        bool need_unlock; /* for mut. excl. ops lock */
        int ret;
-        int need_to_clear_lock = 0;
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
@@ -3447,14 +3454,61 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
        if (ret)
                return ret;
-        mutex_lock(&fs_info->volume_mutex);
+again:
+        if (!atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)) {
+                mutex_lock(&fs_info->volume_mutex);
+                mutex_lock(&fs_info->balance_mutex);
+                need_unlock = true;
+                goto locked;
+        }
+        /*
+         * mut. excl. ops lock is locked.  Three possibilites:
+         *   (1) some other op is running
+         *   (2) balance is running
+         *   (3) balance is paused -- special case (think resume)
+         */
        mutex_lock(&fs_info->balance_mutex);
+        if (fs_info->balance_ctl) {
+                /* this is either (2) or (3) */
+                if (!atomic_read(&fs_info->balance_running)) {
+                        mutex_unlock(&fs_info->balance_mutex);
+                        if (!mutex_trylock(&fs_info->volume_mutex))
+                                goto again;
+                        mutex_lock(&fs_info->balance_mutex);
+                        if (fs_info->balance_ctl &&
+                            !atomic_read(&fs_info->balance_running)) {
+                                /* this is (3) */
+                                need_unlock = false;
+                                goto locked;
+                        }
+                        mutex_unlock(&fs_info->balance_mutex);
+                        mutex_unlock(&fs_info->volume_mutex);
+                        goto again;
+                } else {
+                        /* this is (2) */
+                        mutex_unlock(&fs_info->balance_mutex);
+                        ret = -EINPROGRESS;
+                        goto out;
+                }
+        } else {
+                /* this is (1) */
+                mutex_unlock(&fs_info->balance_mutex);
+                pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
+                ret = -EINVAL;
+                goto out;
+        }
+locked:
+        BUG_ON(!atomic_read(&fs_info->mutually_exclusive_operation_running));
        if (arg) {
                bargs = memdup_user(arg, sizeof(*bargs));
                if (IS_ERR(bargs)) {
                        ret = PTR_ERR(bargs);
-                        goto out;
+                        goto out_unlock;
                }
                if (bargs->flags & BTRFS_BALANCE_RESUME) {
@@ -3474,13 +3528,10 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
                bargs = NULL;
        }
-        if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
+        if (fs_info->balance_ctl) {
-                        1)) {
-                pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
                ret = -EINPROGRESS;
                goto out_bargs;
        }
-        need_to_clear_lock = 1;
        bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
        if (!bctl) {
@@ -3501,11 +3552,17 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
        }
 do_balance:
-        ret = btrfs_balance(bctl, bargs);
        /*
-         * bctl is freed in __cancel_balance or in free_fs_info if
+         * Ownership of bctl and mutually_exclusive_operation_running
-         * restriper was paused all the way until unmount
+         * goes to to btrfs_balance.  bctl is freed in __cancel_balance,
+         * or, if restriper was paused all the way until unmount, in
+         * free_fs_info.  mutually_exclusive_operation_running is
+         * cleared in __cancel_balance.
         */
+        need_unlock = false;
+        ret = btrfs_balance(bctl, bargs);
        if (arg) {
                if (copy_to_user(arg, bargs, sizeof(*bargs)))
                        ret = -EFAULT;
@@ -3513,12 +3570,12 @@ do_balance:
 out_bargs:
        kfree(bargs);
-out:
+out_unlock:
-        if (need_to_clear_lock)
-                atomic_set(&root->fs_info->mutually_exclusive_operation_running,
-                           0);
        mutex_unlock(&fs_info->balance_mutex);
        mutex_unlock(&fs_info->volume_mutex);
+        if (need_unlock)
+                atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
+out:
        mnt_drop_write_file(file);
        return ret;
 }
@@ -3573,7 +3630,7 @@ out:
 static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg)
 {
-        struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
+        struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
        struct btrfs_ioctl_quota_ctl_args *sa;
        struct btrfs_trans_handle *trans = NULL;
        int ret;
@@ -3632,7 +3689,7 @@ drop_write:
 static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg)
 {
-        struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
+        struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
        struct btrfs_ioctl_qgroup_assign_args *sa;
        struct btrfs_trans_handle *trans;
        int ret;
@@ -3679,7 +3736,7 @@ drop_write:
 static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg)
 {
-        struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
+        struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
        struct btrfs_ioctl_qgroup_create_args *sa;
        struct btrfs_trans_handle *trans;
        int ret;
@@ -3698,6 +3755,11 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg)
                goto drop_write;
        }
+        if (!sa->qgroupid) {
+                ret = -EINVAL;
+                goto out;
+        }
        trans = btrfs_join_transaction(root);
        if (IS_ERR(trans)) {
                ret = PTR_ERR(trans);
@@ -3725,7 +3787,7 @@ drop_write:
 static long btrfs_ioctl_qgroup_limit(struct file *file, void __user *arg)
 {
-        struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
+        struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
        struct btrfs_ioctl_qgroup_limit_args *sa;
        struct btrfs_trans_handle *trans;
        int ret;
@@ -3775,7 +3837,7 @@ static long btrfs_ioctl_set_received_subvol(struct file *file,
                                            void __user *arg)
 {
        struct btrfs_ioctl_received_subvol_args *sa = NULL;
-        struct inode *inode = fdentry(file)->d_inode;
+        struct inode *inode = file_inode(file);
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_root_item *root_item = &root->root_item;
        struct btrfs_trans_handle *trans;
@@ -3855,7 +3917,7 @@ out:
 long btrfs_ioctl(struct file *file, unsigned int
                cmd, unsigned long arg)
 {
-        struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
+        struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
        void __user *argp = (void __user *)arg;
        switch (cmd) {
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index f10731297040..e5ed56729607 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -836,9 +836,16 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
         * if the disk i_size is already at the inode->i_size, or
         * this ordered extent is inside the disk i_size, we're done
         */
-        if (disk_i_size == i_size || offset <= disk_i_size) {
+        if (disk_i_size == i_size)
+                goto out;
+        /*
+         * We still need to update disk_i_size if outstanding_isize is greater
+         * than disk_i_size.
+         */
+        if (offset <= disk_i_size &&
+            (!ordered || ordered->outstanding_isize <= disk_i_size))
                goto out;
-        }
        /*
         * walk backward from this ordered extent to disk_i_size.
@@ -870,7 +877,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
                        break;
                if (test->file_offset >= i_size)
                        break;
-                if (test->file_offset >= disk_i_size) {
+                if (entry_end(test) > disk_i_size) {
                        /*
                         * we don't update disk_i_size now, so record this
                         * undealt i_size. Or we will not know the real
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index fe9d02c45f8e..a5c856234323 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -379,6 +379,13 @@ next1:
                ret = add_relation_rb(fs_info, found_key.objectid,
                                      found_key.offset);
+                if (ret == -ENOENT) {
+                        printk(KERN_WARNING
+                                "btrfs: orphan qgroup relation 0x%llx->0x%llx\n",
+                                (unsigned long long)found_key.objectid,
+                                (unsigned long long)found_key.offset);
+                        ret = 0;        /* ignore the error */
+                }
                if (ret)
                        goto out;
 next2:
@@ -956,17 +963,28 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
                        struct btrfs_fs_info *fs_info, u64 qgroupid)
 {
        struct btrfs_root *quota_root;
+        struct btrfs_qgroup *qgroup;
        int ret = 0;
        quota_root = fs_info->quota_root;
        if (!quota_root)
                return -EINVAL;
+        /* check if there are no relations to this qgroup */
+        spin_lock(&fs_info->qgroup_lock);
+        qgroup = find_qgroup_rb(fs_info, qgroupid);
+        if (qgroup) {
+                if (!list_empty(&qgroup->groups) || !list_empty(&qgroup->members)) {
+                        spin_unlock(&fs_info->qgroup_lock);
+                        return -EBUSY;
+                }
+        }
+        spin_unlock(&fs_info->qgroup_lock);
        ret = del_qgroup_item(trans, quota_root, qgroupid);
        spin_lock(&fs_info->qgroup_lock);
        del_qgroup_rb(quota_root->fs_info, qgroupid);
        spin_unlock(&fs_info->qgroup_lock);
        return ret;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 300e09ac3659..17c306bf177a 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3472,7 +3472,7 @@ out:
 }
 /*
- * hepler to find all tree blocks that reference a given data extent
+ * helper to find all tree blocks that reference a given data extent
 */
 static noinline_for_stack
 int add_data_references(struct reloc_control *rc,
@@ -3566,7 +3566,7 @@ int add_data_references(struct reloc_control *rc,
 }
 /*
- * hepler to find next unprocessed extent
+ * helper to find next unprocessed extent
 */
 static noinline_for_stack
 int find_next_extent(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index bdbb94f245c9..67783e03d121 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -580,20 +580,29 @@ static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx)
        int corrected = 0;
        struct btrfs_key key;
        struct inode *inode = NULL;
+        struct btrfs_fs_info *fs_info;
        u64 end = offset + PAGE_SIZE - 1;
        struct btrfs_root *local_root;
+        int srcu_index;
        key.objectid = root;
        key.type = BTRFS_ROOT_ITEM_KEY;
        key.offset = (u64)-1;
-        local_root = btrfs_read_fs_root_no_name(fixup->root->fs_info, &key);
-        if (IS_ERR(local_root))
+        fs_info = fixup->root->fs_info;
+        srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
+        local_root = btrfs_read_fs_root_no_name(fs_info, &key);
+        if (IS_ERR(local_root)) {
+                srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
                return PTR_ERR(local_root);
+        }
        key.type = BTRFS_INODE_ITEM_KEY;
        key.objectid = inum;
        key.offset = 0;
-        inode = btrfs_iget(fixup->root->fs_info->sb, &key, local_root, NULL);
+        inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
+        srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
        if (IS_ERR(inode))
                return PTR_ERR(inode);
@@ -606,7 +615,6 @@ static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx)
        }
        if (PageUptodate(page)) {
-                struct btrfs_fs_info *fs_info;
                if (PageDirty(page)) {
                        /*
                         * we need to write the data to the defect sector. the
@@ -3180,18 +3188,25 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx)
        u64 physical_for_dev_replace;
        u64 len;
        struct btrfs_fs_info *fs_info = nocow_ctx->sctx->dev_root->fs_info;
+        int srcu_index;
        key.objectid = root;
        key.type = BTRFS_ROOT_ITEM_KEY;
        key.offset = (u64)-1;
+        srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
        local_root = btrfs_read_fs_root_no_name(fs_info, &key);
-        if (IS_ERR(local_root))
+        if (IS_ERR(local_root)) {
+                srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
                return PTR_ERR(local_root);
+        }
        key.type = BTRFS_INODE_ITEM_KEY;
        key.objectid = inum;
        key.offset = 0;
        inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
+        srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
        if (IS_ERR(inode))
                return PTR_ERR(inode);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 54454542ad40..f4ab7a9260eb 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -1814,8 +1814,10 @@ static int name_cache_insert(struct send_ctx *sctx,
                        (unsigned long)nce->ino);
        if (!nce_head) {
                nce_head = kmalloc(sizeof(*nce_head), GFP_NOFS);
-                if (!nce_head)
+                if (!nce_head) {
+                        kfree(nce);
                        return -ENOMEM;
+                }
                INIT_LIST_HEAD(nce_head);
                ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head);
@@ -4542,7 +4544,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
-        send_root = BTRFS_I(fdentry(mnt_file)->d_inode)->root;
+        send_root = BTRFS_I(file_inode(mnt_file))->root;
        fs_info = send_root->fs_info;
        arg = memdup_user(arg_, sizeof(*arg));
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 99545df1b86c..d8982e9601d3 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -267,7 +267,7 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
                             function, line, errstr);
                return;
        }
-        trans->transaction->aborted = errno;
+        ACCESS_ONCE(trans->transaction->aborted) = errno;
        __btrfs_std_error(root->fs_info, function, line, errno, NULL);
 }
 /*
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 87fac9a21ea5..4c0067c4f76d 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -112,7 +112,6 @@ loop:
                 * to redo the trans_no_join checks above
                 */
                kmem_cache_free(btrfs_transaction_cachep, cur_trans);
-                cur_trans = fs_info->running_transaction;
                goto loop;
        } else if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
                spin_unlock(&fs_info->trans_lock);
@@ -333,12 +332,14 @@ start_transaction(struct btrfs_root *root, u64 num_items, int type,
                                          &root->fs_info->trans_block_rsv,
                                          num_bytes, flush);
                if (ret)
-                        return ERR_PTR(ret);
+                        goto reserve_fail;
        }
 again:
        h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
-        if (!h)
+        if (!h) {
-                return ERR_PTR(-ENOMEM);
+                ret = -ENOMEM;
+                goto alloc_fail;
+        }
        /*
         * If we are JOIN_NOLOCK we're already committing a transaction and
@@ -365,11 +366,7 @@ again:
        if (ret < 0) {
                /* We must get the transaction if we are JOIN_NOLOCK. */
                BUG_ON(type == TRANS_JOIN_NOLOCK);
+                goto join_fail;
-                if (type < TRANS_JOIN_NOLOCK)
-                        sb_end_intwrite(root->fs_info->sb);
-                kmem_cache_free(btrfs_trans_handle_cachep, h);
-                return ERR_PTR(ret);
        }
        cur_trans = root->fs_info->running_transaction;
@@ -410,6 +407,19 @@ got_it:
        if (!current->journal_info && type != TRANS_USERSPACE)
                current->journal_info = h;
        return h;
+join_fail:
+        if (type < TRANS_JOIN_NOLOCK)
+                sb_end_intwrite(root->fs_info->sb);
+        kmem_cache_free(btrfs_trans_handle_cachep, h);
+alloc_fail:
+        if (num_bytes)
+                btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv,
+                                        num_bytes);
+reserve_fail:
+        if (qgroup_reserved)
+                btrfs_qgroup_free(root, qgroup_reserved);
+        return ERR_PTR(ret);
 }
 struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
@@ -1468,7 +1478,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                goto cleanup_transaction;
        }
-        if (cur_trans->aborted) {
+        /* Stop the commit early if ->aborted is set */
+        if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
                ret = cur_trans->aborted;
                goto cleanup_transaction;
        }
@@ -1574,6 +1585,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
        wait_event(cur_trans->writer_wait,
                   atomic_read(&cur_trans->num_writers) == 1);
+        /* ->aborted might be set after the previous check, so check it */
+        if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
+                ret = cur_trans->aborted;
+                goto cleanup_transaction;
+        }
        /*
         * the reloc mutex makes sure that we stop
         * the balancing code from coming in and moving
@@ -1657,6 +1673,17 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                goto cleanup_transaction;
        }
+        /*
+         * The tasks which save the space cache and inode cache may also
+         * update ->aborted, check it.
+         */
+        if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
+                ret = cur_trans->aborted;
+                mutex_unlock(&root->fs_info->tree_log_mutex);
+                mutex_unlock(&root->fs_info->reloc_mutex);
+                goto cleanup_transaction;
+        }
        btrfs_prepare_extent_commit(trans, root);
        cur_trans = root->fs_info->running_transaction;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 83186c7e45d4..9027bb1e7466 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3357,6 +3357,11 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
        if (skip_csum)
                return 0;
+        if (em->compress_type) {
+                csum_offset = 0;
+                csum_len = block_len;
+        }
        /* block start is already adjusted for the file extent offset. */
        ret = btrfs_lookup_csums_range(log->fs_info->csum_root,
                                       em->block_start + csum_offset,
@@ -3410,13 +3415,13 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
                em = list_entry(extents.next, struct extent_map, list);
                list_del_init(&em->list);
-                clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
                /*
                 * If we had an error we just need to delete everybody from our
                 * private list.
                 */
                if (ret) {
+                        clear_em_logging(tree, em);
                        free_extent_map(em);
                        continue;
                }
@@ -3424,8 +3429,9 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
                write_unlock(&tree->lock);
                ret = log_one_extent(trans, inode, root, em, path);
-                free_extent_map(em);
                write_lock(&tree->lock);
+                clear_em_logging(tree, em);
+                free_extent_map(em);
        }
        WARN_ON(!list_empty(&extents));
        write_unlock(&tree->lock);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 5cce6aa74012..5cbb7f4b1672 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1431,7 +1431,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
                }
        } else {
                ret = btrfs_get_bdev_and_sb(device_path,
-                                            FMODE_READ | FMODE_EXCL,
+                                            FMODE_WRITE | FMODE_EXCL,
                                            root->fs_info->bdev_holder, 0,
                                            &bdev, &bh);
                if (ret)
@@ -1556,7 +1556,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
        ret = 0;
        /* Notify udev that device has changed */
-        btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
+        if (bdev)
+                btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
 error_brelse:
        brelse(bh);
@@ -2614,7 +2615,14 @@ static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
        cache = btrfs_lookup_block_group(fs_info, chunk_offset);
        chunk_used = btrfs_block_group_used(&cache->item);
-        user_thresh = div_factor_fine(cache->key.offset, bargs->usage);
+        if (bargs->usage == 0)
+                user_thresh = 0;
+        else if (bargs->usage > 100)
+                user_thresh = cache->key.offset;
+        else
+                user_thresh = div_factor_fine(cache->key.offset,
+                                              bargs->usage);
        if (chunk_used < user_thresh)
                ret = 0;
@@ -2959,6 +2967,8 @@ static void __cancel_balance(struct btrfs_fs_info *fs_info)
        unset_balance_control(fs_info);
        ret = del_balance_item(fs_info->tree_root);
        BUG_ON(ret);
+        atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
 }
 void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
@@ -3138,8 +3148,10 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
 out:
        if (bctl->flags & BTRFS_BALANCE_RESUME)
                __cancel_balance(fs_info);
-        else
+        else {
                kfree(bctl);
+                atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
+        }
        return ret;
 }
@@ -3156,7 +3168,6 @@ static int balance_kthread(void *data)
                ret = btrfs_balance(fs_info->balance_ctl, NULL);
        }
-        atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
        mutex_unlock(&fs_info->balance_mutex);
        mutex_unlock(&fs_info->volume_mutex);
@@ -3179,7 +3190,6 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
                return 0;
        }
-        WARN_ON(atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1));
        tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
        if (IS_ERR(tsk))
                return PTR_ERR(tsk);
@@ -3233,6 +3243,8 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
        btrfs_balance_sys(leaf, item, &disk_bargs);
        btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
+        WARN_ON(atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1));
        mutex_lock(&fs_info->volume_mutex);
        mutex_lock(&fs_info->balance_mutex);
@@ -3496,7 +3508,7 @@ struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
        { 1, 1, 2, 2, 2, 2 /* raid1 */ },
        { 1, 2, 1, 1, 1, 2 /* dup */ },
        { 1, 1, 0, 2, 1, 1 /* raid0 */ },
-        { 1, 1, 0, 1, 1, 1 /* single */ },
+        { 1, 1, 1, 1, 1, 1 /* single */ },
 };
 static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
diff --git a/fs/buffer.c b/fs/buffer.c
index c017a2dfb909..b4dcb34c9635 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -41,6 +41,7 @@
 #include <linux/bitops.h>
 #include <linux/mpage.h>
 #include <linux/bit_spinlock.h>
+#include <trace/events/block.h>
 static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
@@ -53,6 +54,13 @@ void init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
 }
 EXPORT_SYMBOL(init_buffer);
+inline void touch_buffer(struct buffer_head *bh)
+{
+        trace_block_touch_buffer(bh);
+        mark_page_accessed(bh->b_page);
+}
+EXPORT_SYMBOL(touch_buffer);
 static int sleep_on_buffer(void *word)
 {
        io_schedule();
@@ -1113,6 +1121,8 @@ void mark_buffer_dirty(struct buffer_head *bh)
 {
        WARN_ON_ONCE(!buffer_uptodate(bh));
+        trace_block_dirty_buffer(bh);
        /*
         * Very *carefully* optimize the it-is-already-dirty case.
         *
@@ -2332,7 +2342,7 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
                         get_block_t get_block)
 {
        struct page *page = vmf->page;
-        struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(vma->vm_file);
        unsigned long end;
        loff_t size;
        int ret;
@@ -2359,7 +2369,7 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
        if (unlikely(ret < 0))
                goto out_unlock;
        set_page_dirty(page);
-        wait_on_page_writeback(page);
+        wait_for_stable_page(page);
        return 0;
 out_unlock:
        unlock_page(page);
@@ -2371,7 +2381,7 @@ int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
                   get_block_t get_block)
 {
        int ret;
-        struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb;
+        struct super_block *sb = file_inode(vma->vm_file)->i_sb;
        sb_start_pagefault(sb);
@@ -2935,6 +2945,7 @@ static void guard_bh_eod(int rw, struct bio *bio, struct buffer_head *bh)
                void *kaddr = kmap_atomic(bh->b_page);
                memset(kaddr + bh_offset(bh) + bytes, 0, bh->b_size - bytes);
                kunmap_atomic(kaddr);
+                flush_dcache_page(bh->b_page);
        }
 }
@@ -3226,7 +3237,7 @@ static struct kmem_cache *bh_cachep __read_mostly;
 * Once the number of bh's in the machine exceeds this level, we start
 * stripping them in writeback.
 */
-static int max_buffer_heads;
+static unsigned long max_buffer_heads;
 int buffer_heads_over_limit;
@@ -3342,7 +3353,7 @@ EXPORT_SYMBOL(bh_submit_read);
 void __init buffer_init(void)
 {
-        int nrpages;
+        unsigned long nrpages;
        bh_cachep = kmem_cache_create("buffer_head",
                        sizeof(struct buffer_head), 0,
diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig
index 9eb134ea6eb2..49bc78243db9 100644
--- a/fs/ceph/Kconfig
+++ b/fs/ceph/Kconfig
@@ -1,6 +1,6 @@
 config CEPH_FS
-        tristate "Ceph distributed file system (EXPERIMENTAL)"
+        tristate "Ceph distributed file system"
-        depends on INET && EXPERIMENTAL
+        depends on INET
        select CEPH_LIB
        select LIBCRC32C
        select CRYPTO_AES
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 064d1a68d2c1..a60ea977af6f 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -195,7 +195,7 @@ static int ceph_releasepage(struct page *page, gfp_t g)
 */
 static int readpage_nounlock(struct file *filp, struct page *page)
 {
-        struct inode *inode = filp->f_dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_osd_client *osdc = 
                &ceph_inode_to_client(inode)->client->osdc;
@@ -236,16 +236,10 @@ static int ceph_readpage(struct file *filp, struct page *page)
 static void finish_read(struct ceph_osd_request *req, struct ceph_msg *msg)
 {
        struct inode *inode = req->r_inode;
-        struct ceph_osd_reply_head *replyhead;
+        int rc = req->r_result;
-        int rc, bytes;
+        int bytes = le32_to_cpu(msg->hdr.data_len);
        int i;
-        /* parse reply */
-        replyhead = msg->front.iov_base;
-        WARN_ON(le32_to_cpu(replyhead->num_ops) == 0);
-        rc = le32_to_cpu(replyhead->result);
-        bytes = le32_to_cpu(msg->hdr.data_len);
        dout("finish_read %p req %p rc %d bytes %d\n", inode, req, rc, bytes);
        /* unlock all pages, zeroing any data we didn't read */
@@ -315,7 +309,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
                                    CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
                                    NULL, 0,
                                    ci->i_truncate_seq, ci->i_truncate_size,
-                                    NULL, false, 1, 0);
+                                    NULL, false, 0);
        if (IS_ERR(req))
                return PTR_ERR(req);
@@ -370,7 +364,7 @@ out:
 static int ceph_readpages(struct file *file, struct address_space *mapping,
                          struct list_head *page_list, unsigned nr_pages)
 {
-        struct inode *inode = file->f_dentry->d_inode;
+        struct inode *inode = file_inode(file);
        struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
        int rc = 0;
        int max = 0;
@@ -492,8 +486,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
                                   &ci->i_layout, snapc,
                                   page_off, len,
                                   ci->i_truncate_seq, ci->i_truncate_size,
-                                   &inode->i_mtime,
+                                   &inode->i_mtime, &page, 1);
-                                   &page, 1, 0, 0, true);
        if (err < 0) {
                dout("writepage setting page/mapping error %d %p\n", err, page);
                SetPageError(page);
@@ -554,27 +547,18 @@ static void writepages_finish(struct ceph_osd_request *req,
                              struct ceph_msg *msg)
 {
        struct inode *inode = req->r_inode;
-        struct ceph_osd_reply_head *replyhead;
-        struct ceph_osd_op *op;
        struct ceph_inode_info *ci = ceph_inode(inode);
        unsigned wrote;
        struct page *page;
        int i;
        struct ceph_snap_context *snapc = req->r_snapc;
        struct address_space *mapping = inode->i_mapping;
-        __s32 rc = -EIO;
+        int rc = req->r_result;
-        u64 bytes = 0;
+        u64 bytes = le64_to_cpu(req->r_request_ops[0].extent.length);
        struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
        long writeback_stat;
        unsigned issued = ceph_caps_issued(ci);
-        /* parse reply */
-        replyhead = msg->front.iov_base;
-        WARN_ON(le32_to_cpu(replyhead->num_ops) == 0);
-        op = (void *)(replyhead + 1);
-        rc = le32_to_cpu(replyhead->result);
-        bytes = le64_to_cpu(op->extent.length);
        if (rc >= 0) {
                /*
                 * Assume we wrote the pages we originally sent.  The
@@ -741,8 +725,6 @@ retry:
                struct page *page;
                int want;
                u64 offset, len;
-                struct ceph_osd_request_head *reqhead;
-                struct ceph_osd_op *op;
                long writeback_stat;
                next = 0;
@@ -838,7 +820,7 @@ get_more_pages:
                                            snapc, do_sync,
                                            ci->i_truncate_seq,
                                            ci->i_truncate_size,
-                                            &inode->i_mtime, true, 1, 0);
+                                            &inode->i_mtime, true, 0);
                                if (IS_ERR(req)) {
                                        rc = PTR_ERR(req);
@@ -906,10 +888,8 @@ get_more_pages:
                /* revise final length, page count */
                req->r_num_pages = locked_pages;
-                reqhead = req->r_request->front.iov_base;
+                req->r_request_ops[0].extent.length = cpu_to_le64(len);
-                op = (void *)(reqhead + 1);
+                req->r_request_ops[0].payload_len = cpu_to_le32(len);
-                op->extent.length = cpu_to_le64(len);
-                op->payload_len = cpu_to_le32(len);
                req->r_request->hdr.data_len = cpu_to_le32(len);
                rc = ceph_osdc_start_request(&fsc->client->osdc, req, true);
@@ -977,7 +957,7 @@ static int ceph_update_writeable_page(struct file *file,
                            loff_t pos, unsigned len,
                            struct page *page)
 {
-        struct inode *inode = file->f_dentry->d_inode;
+        struct inode *inode = file_inode(file);
        struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
        loff_t page_off = pos & PAGE_CACHE_MASK;
@@ -1086,7 +1066,7 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
                            loff_t pos, unsigned len, unsigned flags,
                            struct page **pagep, void **fsdata)
 {
-        struct inode *inode = file->f_dentry->d_inode;
+        struct inode *inode = file_inode(file);
        struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_file_info *fi = file->private_data;
        struct page *page;
@@ -1144,7 +1124,7 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
                          loff_t pos, unsigned len, unsigned copied,
                          struct page *page, void *fsdata)
 {
-        struct inode *inode = file->f_dentry->d_inode;
+        struct inode *inode = file_inode(file);
        struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
        struct ceph_mds_client *mdsc = fsc->mdsc;
@@ -1228,7 +1208,7 @@ const struct address_space_operations ceph_aops = {
 */
 static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
-        struct inode *inode = vma->vm_file->f_dentry->d_inode;
+        struct inode *inode = file_inode(vma->vm_file);
        struct page *page = vmf->page;
        struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
        loff_t off = page_offset(page);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index a1d9bb30c1bf..78e2f575247d 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -611,8 +611,16 @@ retry:
        if (flags & CEPH_CAP_FLAG_AUTH)
                ci->i_auth_cap = cap;
-        else if (ci->i_auth_cap == cap)
+        else if (ci->i_auth_cap == cap) {
                ci->i_auth_cap = NULL;
+                spin_lock(&mdsc->cap_dirty_lock);
+                if (!list_empty(&ci->i_dirty_item)) {
+                        dout(" moving %p to cap_dirty_migrating\n", inode);
+                        list_move(&ci->i_dirty_item,
+                                  &mdsc->cap_dirty_migrating);
+                }
+                spin_unlock(&mdsc->cap_dirty_lock);
+        }
        dout("add_cap inode %p (%llx.%llx) cap %p %s now %s seq %d mds%d\n",
             inode, ceph_vinop(inode), cap, ceph_cap_string(issued),
@@ -930,7 +938,7 @@ static int send_cap_msg(struct ceph_mds_session *session,
                        u64 size, u64 max_size,
                        struct timespec *mtime, struct timespec *atime,
                        u64 time_warp_seq,
-                        uid_t uid, gid_t gid, umode_t mode,
+                        kuid_t uid, kgid_t gid, umode_t mode,
                        u64 xattr_version,
                        struct ceph_buffer *xattrs_buf,
                        u64 follows)
@@ -974,8 +982,8 @@ static int send_cap_msg(struct ceph_mds_session *session,
                ceph_encode_timespec(&fc->atime, atime);
        fc->time_warp_seq = cpu_to_le32(time_warp_seq);
-        fc->uid = cpu_to_le32(uid);
+        fc->uid = cpu_to_le32(from_kuid(&init_user_ns, uid));
-        fc->gid = cpu_to_le32(gid);
+        fc->gid = cpu_to_le32(from_kgid(&init_user_ns, gid));
        fc->mode = cpu_to_le32(mode);
        fc->xattr_version = cpu_to_le64(xattr_version);
@@ -1081,8 +1089,8 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
        struct timespec mtime, atime;
        int wake = 0;
        umode_t mode;
-        uid_t uid;
+        kuid_t uid;
-        gid_t gid;
+        kgid_t gid;
        struct ceph_mds_session *session;
        u64 xattr_version = 0;
        struct ceph_buffer *xattr_blob = NULL;
@@ -1460,7 +1468,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
        struct ceph_mds_client *mdsc = fsc->mdsc;
        struct inode *inode = &ci->vfs_inode;
        struct ceph_cap *cap;
-        int file_wanted, used;
+        int file_wanted, used, cap_used;
        int took_snap_rwsem = 0;             /* true if mdsc->snap_rwsem held */
        int issued, implemented, want, retain, revoking, flushing = 0;
        int mds = -1;   /* keep track of how far we've gone through i_caps list
@@ -1563,9 +1571,14 @@ retry_locked:
                /* NOTE: no side-effects allowed, until we take s_mutex */
+                cap_used = used;
+                if (ci->i_auth_cap && cap != ci->i_auth_cap)
+                        cap_used &= ~ci->i_auth_cap->issued;
                revoking = cap->implemented & ~cap->issued;
-                dout(" mds%d cap %p issued %s implemented %s revoking %s\n",
+                dout(" mds%d cap %p used %s issued %s implemented %s revoking %s\n",
                     cap->mds, cap, ceph_cap_string(cap->issued),
+                     ceph_cap_string(cap_used),
                     ceph_cap_string(cap->implemented),
                     ceph_cap_string(revoking));
@@ -1593,7 +1606,7 @@ retry_locked:
                }
                /* completed revocation? going down and there are no caps? */
-                if (revoking && (revoking & used) == 0) {
+                if (revoking && (revoking & cap_used) == 0) {
                        dout("completed revocation of %s\n",
                             ceph_cap_string(cap->implemented & ~cap->issued));
                        goto ack;
@@ -1670,8 +1683,8 @@ ack:
                sent++;
                /* __send_cap drops i_ceph_lock */
-                delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, used, want,
+                delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, cap_used,
-                                      retain, flushing, NULL);
+                                      want, retain, flushing, NULL);
                goto retry; /* retake i_ceph_lock and restart our cap scan. */
        }
@@ -2359,10 +2372,11 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
        if ((issued & CEPH_CAP_AUTH_EXCL) == 0) {
                inode->i_mode = le32_to_cpu(grant->mode);
-                inode->i_uid = le32_to_cpu(grant->uid);
+                inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(grant->uid));
-                inode->i_gid = le32_to_cpu(grant->gid);
+                inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(grant->gid));
                dout("%p mode 0%o uid.gid %d.%d\n", inode, inode->i_mode,
-                     inode->i_uid, inode->i_gid);
+                     from_kuid(&init_user_ns, inode->i_uid),
+                     from_kgid(&init_user_ns, inode->i_gid));
        }
        if ((issued & CEPH_CAP_LINK_EXCL) == 0)
@@ -2416,7 +2430,9 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
                dout("mds wanted %s -> %s\n",
                     ceph_cap_string(le32_to_cpu(grant->wanted)),
                     ceph_cap_string(wanted));
-                grant->wanted = cpu_to_le32(wanted);
+                /* imported cap may not have correct mds_wanted */
+                if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT)
+                        check_caps = 1;
        }
        cap->seq = seq;
@@ -2820,6 +2836,9 @@ void ceph_handle_caps(struct ceph_mds_session *session,
        dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq,
             (unsigned)seq);
+        if (op == CEPH_CAP_OP_IMPORT)
+                ceph_add_cap_releases(mdsc, session);
        /* lookup ino */
        inode = ceph_find_inode(sb, vino);
        ci = ceph_inode(inode);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 8c1aabe93b67..6d797f46d772 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -238,7 +238,7 @@ static int note_last_dentry(struct ceph_file_info *fi, const char *name,
 static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
        struct ceph_file_info *fi = filp->private_data;
-        struct inode *inode = filp->f_dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
        struct ceph_mds_client *mdsc = fsc->mdsc;
@@ -1138,7 +1138,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
                             loff_t *ppos)
 {
        struct ceph_file_info *cf = file->private_data;
-        struct inode *inode = file->f_dentry->d_inode;
+        struct inode *inode = file_inode(file);
        struct ceph_inode_info *ci = ceph_inode(inode);
        int left;
        const int bufsize = 1024;
@@ -1188,7 +1188,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
 static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end,
                          int datasync)
 {
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(file);
        struct ceph_inode_info *ci = ceph_inode(inode);
        struct list_head *head = &ci->i_unsafe_dirops;
        struct ceph_mds_request *req;
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index ca3ab3f9ca70..16796be53ca5 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -81,7 +81,7 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
                if (parent_inode) {
                        /* nfsd wants connectable */
                        *max_len = connected_handle_length;
-                        type = 255;
+                        type = FILEID_INVALID;
                } else {
                        dout("encode_fh %p\n", dentry);
                        fh->ino = ceph_ino(inode);
@@ -90,7 +90,7 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
                }
        } else {
                *max_len = handle_length;
-                type = 255;
+                type = FILEID_INVALID;
        }
        if (dentry)
                dput(dentry);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index e51558fca3a3..bf338d9b67e3 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -243,6 +243,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
        err = ceph_mdsc_do_request(mdsc,
                                   (flags & (O_CREAT|O_TRUNC)) ? dir : NULL,
                                   req);
+        if (err)
+                goto out_err;
        err = ceph_handle_snapdir(req, dentry, err);
        if (err == 0 && (flags & O_CREAT) && !req->r_reply_info.head->is_dentry)
                err = ceph_handle_notrace_create(dir, dentry);
@@ -263,6 +266,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
                err = finish_no_open(file, dn);
        } else {
                dout("atomic_open finish_open on dn %p\n", dn);
+                if (req->r_op == CEPH_MDS_OP_CREATE && req->r_reply_info.has_create_ino) {
+                        *opened |= FILE_CREATED;
+                }
                err = finish_open(file, dentry, ceph_open, opened);
        }
@@ -393,7 +399,7 @@ more:
 static ssize_t ceph_sync_read(struct file *file, char __user *data,
                              unsigned len, loff_t *poff, int *checkeof)
 {
-        struct inode *inode = file->f_dentry->d_inode;
+        struct inode *inode = file_inode(file);
        struct page **pages;
        u64 off = *poff;
        int num_pages, ret;
@@ -466,7 +472,7 @@ static void sync_write_commit(struct ceph_osd_request *req,
 static ssize_t ceph_sync_write(struct file *file, const char __user *data,
                               size_t left, loff_t *offset)
 {
-        struct inode *inode = file->f_dentry->d_inode;
+        struct inode *inode = file_inode(file);
        struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
        struct ceph_osd_request *req;
@@ -483,7 +489,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
        int ret;
        struct timespec mtime = CURRENT_TIME;
-        if (ceph_snap(file->f_dentry->d_inode) != CEPH_NOSNAP)
+        if (ceph_snap(file_inode(file)) != CEPH_NOSNAP)
                return -EROFS;
        dout("sync_write on file %p %lld~%u %s\n", file, *offset,
@@ -535,7 +541,7 @@ more:
                                    ci->i_snap_realm->cached_context,
                                    do_sync,
                                    ci->i_truncate_seq, ci->i_truncate_size,
-                                    &mtime, false, 2, page_align);
+                                    &mtime, false, page_align);
        if (IS_ERR(req))
                return PTR_ERR(req);
@@ -637,7 +643,7 @@ static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov,
        struct ceph_file_info *fi = filp->private_data;
        loff_t *ppos = &iocb->ki_pos;
        size_t len = iov->iov_len;
-        struct inode *inode = filp->f_dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct ceph_inode_info *ci = ceph_inode(inode);
        void __user *base = iov->iov_base;
        ssize_t ret;
@@ -707,7 +713,7 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov,
 {
        struct file *file = iocb->ki_filp;
        struct ceph_file_info *fi = file->private_data;
-        struct inode *inode = file->f_dentry->d_inode;
+        struct inode *inode = file_inode(file);
        struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_osd_client *osdc =
                &ceph_sb_to_client(inode->i_sb)->client->osdc;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 2971eaa65cdc..851814d951cd 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -612,10 +612,11 @@ static int fill_inode(struct inode *inode,
        if ((issued & CEPH_CAP_AUTH_EXCL) == 0) {
                inode->i_mode = le32_to_cpu(info->mode);
-                inode->i_uid = le32_to_cpu(info->uid);
+                inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(info->uid));
-                inode->i_gid = le32_to_cpu(info->gid);
+                inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(info->gid));
                dout("%p mode 0%o uid.gid %d.%d\n", inode, inode->i_mode,
-                     inode->i_uid, inode->i_gid);
+                     from_kuid(&init_user_ns, inode->i_uid),
+                     from_kgid(&init_user_ns, inode->i_gid));
        }
        if ((issued & CEPH_CAP_LINK_EXCL) == 0)
@@ -1130,8 +1131,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
                                            req->r_request_started);
                dout(" final dn %p\n", dn);
                i++;
-        } else if (req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
+        } else if ((req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
-                   req->r_op == CEPH_MDS_OP_MKSNAP) {
+                   req->r_op == CEPH_MDS_OP_MKSNAP) && !req->r_aborted) {
                struct dentry *dn = req->r_dentry;
                /* fill out a snapdir LOOKUPSNAP dentry */
@@ -1195,6 +1196,39 @@ done:
 /*
 * Prepopulate our cache with readdir results, leases, etc.
 */
+static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req,
+                                           struct ceph_mds_session *session)
+{
+        struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
+        int i, err = 0;
+        for (i = 0; i < rinfo->dir_nr; i++) {
+                struct ceph_vino vino;
+                struct inode *in;
+                int rc;
+                vino.ino = le64_to_cpu(rinfo->dir_in[i].in->ino);
+                vino.snap = le64_to_cpu(rinfo->dir_in[i].in->snapid);
+                in = ceph_get_inode(req->r_dentry->d_sb, vino);
+                if (IS_ERR(in)) {
+                        err = PTR_ERR(in);
+                        dout("new_inode badness got %d\n", err);
+                        continue;
+                }
+                rc = fill_inode(in, &rinfo->dir_in[i], NULL, session,
+                                req->r_request_started, -1,
+                                &req->r_caps_reservation);
+                if (rc < 0) {
+                        pr_err("fill_inode badness on %p got %d\n", in, rc);
+                        err = rc;
+                        continue;
+                }
+        }
+        return err;
+}
 int ceph_readdir_prepopulate(struct ceph_mds_request *req,
                             struct ceph_mds_session *session)
 {
@@ -1209,6 +1243,9 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
        u64 frag = le32_to_cpu(rhead->args.readdir.frag);
        struct ceph_dentry_info *di;
+        if (req->r_aborted)
+                return readdir_prepopulate_inodes_only(req, session);
        if (le32_to_cpu(rinfo->head->op) == CEPH_MDS_OP_LSSNAP) {
                snapdir = ceph_get_snapdir(parent->d_inode);
                parent = d_find_alias(snapdir);
@@ -1565,26 +1602,30 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
        if (ia_valid & ATTR_UID) {
                dout("setattr %p uid %d -> %d\n", inode,
-                     inode->i_uid, attr->ia_uid);
+                     from_kuid(&init_user_ns, inode->i_uid),
+                     from_kuid(&init_user_ns, attr->ia_uid));
                if (issued & CEPH_CAP_AUTH_EXCL) {
                        inode->i_uid = attr->ia_uid;
                        dirtied |= CEPH_CAP_AUTH_EXCL;
                } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 ||
-                           attr->ia_uid != inode->i_uid) {
+                           !uid_eq(attr->ia_uid, inode->i_uid)) {
-                        req->r_args.setattr.uid = cpu_to_le32(attr->ia_uid);
+                        req->r_args.setattr.uid = cpu_to_le32(
+                                from_kuid(&init_user_ns, attr->ia_uid));
                        mask |= CEPH_SETATTR_UID;
                        release |= CEPH_CAP_AUTH_SHARED;
                }
        }
        if (ia_valid & ATTR_GID) {
                dout("setattr %p gid %d -> %d\n", inode,
-                     inode->i_gid, attr->ia_gid);
+                     from_kgid(&init_user_ns, inode->i_gid),
+                     from_kgid(&init_user_ns, attr->ia_gid));
                if (issued & CEPH_CAP_AUTH_EXCL) {
                        inode->i_gid = attr->ia_gid;
                        dirtied |= CEPH_CAP_AUTH_EXCL;
                } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 ||
-                           attr->ia_gid != inode->i_gid) {
+                           !gid_eq(attr->ia_gid, inode->i_gid)) {
-                        req->r_args.setattr.gid = cpu_to_le32(attr->ia_gid);
+                        req->r_args.setattr.gid = cpu_to_le32(
+                                from_kgid(&init_user_ns, attr->ia_gid));
                        mask |= CEPH_SETATTR_GID;
                        release |= CEPH_CAP_AUTH_SHARED;
                }
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index 36549a46e311..4a989345b37b 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -16,11 +16,11 @@
 */
 static long ceph_ioctl_get_layout(struct file *file, void __user *arg)
 {
-        struct ceph_inode_info *ci = ceph_inode(file->f_dentry->d_inode);
+        struct ceph_inode_info *ci = ceph_inode(file_inode(file));
        struct ceph_ioctl_layout l;
        int err;
-        err = ceph_do_getattr(file->f_dentry->d_inode, CEPH_STAT_CAP_LAYOUT);
+        err = ceph_do_getattr(file_inode(file), CEPH_STAT_CAP_LAYOUT);
        if (!err) {
                l.stripe_unit = ceph_file_layout_su(ci->i_layout);
                l.stripe_count = ceph_file_layout_stripe_count(ci->i_layout);
@@ -63,12 +63,12 @@ static long __validate_layout(struct ceph_mds_client *mdsc,
 static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
 {
-        struct inode *inode = file->f_dentry->d_inode;
+        struct inode *inode = file_inode(file);
        struct inode *parent_inode;
        struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
        struct ceph_mds_request *req;
        struct ceph_ioctl_layout l;
-        struct ceph_inode_info *ci = ceph_inode(file->f_dentry->d_inode);
+        struct ceph_inode_info *ci = ceph_inode(file_inode(file));
        struct ceph_ioctl_layout nl;
        int err;
@@ -76,7 +76,7 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
                return -EFAULT;
        /* validate changed params against current layout */
-        err = ceph_do_getattr(file->f_dentry->d_inode, CEPH_STAT_CAP_LAYOUT);
+        err = ceph_do_getattr(file_inode(file), CEPH_STAT_CAP_LAYOUT);
        if (err)
                return err;
@@ -136,7 +136,7 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
 */
 static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg)
 {
-        struct inode *inode = file->f_dentry->d_inode;
+        struct inode *inode = file_inode(file);
        struct ceph_mds_request *req;
        struct ceph_ioctl_layout l;
        int err;
@@ -179,13 +179,12 @@ static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg)
 static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
 {
        struct ceph_ioctl_dataloc dl;
-        struct inode *inode = file->f_dentry->d_inode;
+        struct inode *inode = file_inode(file);
        struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_osd_client *osdc =
                &ceph_sb_to_client(inode->i_sb)->client->osdc;
        u64 len = 1, olen;
        u64 tmp;
-        struct ceph_object_layout ol;
        struct ceph_pg pgid;
        int r;
@@ -194,7 +193,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
                return -EFAULT;
        down_read(&osdc->map_sem);
-        r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, &len,
+        r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, len,
                                          &dl.object_no, &dl.object_offset,
                                          &olen);
        if (r < 0)
@@ -209,10 +208,9 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
        snprintf(dl.object_name, sizeof(dl.object_name), "%llx.%08llx",
                 ceph_ino(inode), dl.object_no);
-        ceph_calc_object_layout(&ol, dl.object_name, &ci->i_layout,
+        ceph_calc_object_layout(&pgid, dl.object_name, &ci->i_layout,
                                osdc->osdmap);
-        pgid = ol.ol_pgid;
        dl.osd = ceph_calc_pg_primary(osdc->osdmap, pgid);
        if (dl.osd >= 0) {
                struct ceph_entity_addr *a =
@@ -234,7 +232,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
 static long ceph_ioctl_lazyio(struct file *file)
 {
        struct ceph_file_info *fi = file->private_data;
-        struct inode *inode = file->f_dentry->d_inode;
+        struct inode *inode = file_inode(file);
        struct ceph_inode_info *ci = ceph_inode(inode);
        if ((fi->fmode & CEPH_FILE_MODE_LAZY) == 0) {
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index 80576d05d687..202dd3d68be0 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -13,7 +13,7 @@
 static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
                             int cmd, u8 wait, struct file_lock *fl)
 {
-        struct inode *inode = file->f_dentry->d_inode;
+        struct inode *inode = file_inode(file);
        struct ceph_mds_client *mdsc =
                ceph_sb_to_client(inode->i_sb)->mdsc;
        struct ceph_mds_request *req;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 9165eb8309eb..442880d099c9 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -233,6 +233,30 @@ bad:
 }
 /*
+ * parse create results
+ */
+static int parse_reply_info_create(void **p, void *end,
+                                  struct ceph_mds_reply_info_parsed *info,
+                                  int features)
+{
+        if (features & CEPH_FEATURE_REPLY_CREATE_INODE) {
+                if (*p == end) {
+                        info->has_create_ino = false;
+                } else {
+                        info->has_create_ino = true;
+                        info->ino = ceph_decode_64(p);
+                }
+        }
+        if (unlikely(*p != end))
+                goto bad;
+        return 0;
+bad:
+        return -EIO;
+}
+/*
 * parse extra results
 */
 static int parse_reply_info_extra(void **p, void *end,
@@ -241,8 +265,12 @@ static int parse_reply_info_extra(void **p, void *end,
 {
        if (info->head->op == CEPH_MDS_OP_GETFILELOCK)
                return parse_reply_info_filelock(p, end, info, features);
-        else
+        else if (info->head->op == CEPH_MDS_OP_READDIR)
                return parse_reply_info_dir(p, end, info, features);
+        else if (info->head->op == CEPH_MDS_OP_CREATE)
+                return parse_reply_info_create(p, end, info, features);
+        else
+                return -EIO;
 }
 /*
@@ -1658,8 +1686,8 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
        head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch);
        head->op = cpu_to_le32(req->r_op);
-        head->caller_uid = cpu_to_le32(req->r_uid);
+        head->caller_uid = cpu_to_le32(from_kuid(&init_user_ns, req->r_uid));
-        head->caller_gid = cpu_to_le32(req->r_gid);
+        head->caller_gid = cpu_to_le32(from_kgid(&init_user_ns, req->r_gid));
        head->args = req->r_args;
        ceph_encode_filepath(&p, end, ino1, path1);
@@ -2170,7 +2198,8 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
        mutex_lock(&req->r_fill_mutex);
        err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session);
        if (err == 0) {
-                if (result == 0 && req->r_op != CEPH_MDS_OP_GETFILELOCK &&
+                if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR ||
+                                    req->r_op == CEPH_MDS_OP_LSSNAP) &&
                    rinfo->dir_nr)
                        ceph_readdir_prepopulate(req, req->r_session);
                ceph_unreserve_caps(mdsc, &req->r_caps_reservation);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index dd26846dd71d..c2a19fbbe517 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -74,6 +74,12 @@ struct ceph_mds_reply_info_parsed {
                        struct ceph_mds_reply_info_in *dir_in;
                        u8                            dir_complete, dir_end;
                };
+                /* for create results */
+                struct {
+                        bool has_create_ino;
+                        u64 ino;
+                };
        };
        /* encoded blob describing snapshot contexts for certain
@@ -184,8 +190,8 @@ struct ceph_mds_request {
        union ceph_mds_request_args r_args;
        int r_fmode;        /* file mode, if expecting cap */
-        uid_t r_uid;
+        kuid_t r_uid;
-        gid_t r_gid;
+        kgid_t r_gid;
        /* for choosing which mds to send this request to */
        int r_direct_mode;
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index 73b7d44e8a35..0d3c9240c61b 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -59,6 +59,10 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
                return ERR_PTR(-ENOMEM);
        ceph_decode_16_safe(p, end, version, bad);
+        if (version > 3) {
+                pr_warning("got mdsmap version %d > 3, failing", version);
+                goto bad;
+        }
        ceph_decode_need(p, end, 8*sizeof(u32) + sizeof(u64), bad);
        m->m_epoch = ceph_decode_32(p);
@@ -144,13 +148,13 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
        /* pg_pools */
        ceph_decode_32_safe(p, end, n, bad);
        m->m_num_data_pg_pools = n;
-        m->m_data_pg_pools = kcalloc(n, sizeof(u32), GFP_NOFS);
+        m->m_data_pg_pools = kcalloc(n, sizeof(u64), GFP_NOFS);
        if (!m->m_data_pg_pools)
                goto badmem;
-        ceph_decode_need(p, end, sizeof(u32)*(n+1), bad);
+        ceph_decode_need(p, end, sizeof(u64)*(n+1), bad);
        for (i = 0; i < n; i++)
-                m->m_data_pg_pools[i] = ceph_decode_32(p);
+                m->m_data_pg_pools[i] = ceph_decode_64(p);
-        m->m_cas_pg_pool = ceph_decode_32(p);
+        m->m_cas_pg_pool = ceph_decode_64(p);
        /* ok, we don't care about the rest. */
        dout("mdsmap_decode success epoch %u\n", m->m_epoch);
diff --git a/fs/ceph/strings.c b/fs/ceph/strings.c
index cd5097d7c804..89fa4a940a0f 100644
--- a/fs/ceph/strings.c
+++ b/fs/ceph/strings.c
@@ -15,6 +15,7 @@ const char *ceph_mds_state_name(int s)
        case CEPH_MDS_STATE_BOOT:       return "up:boot";
        case CEPH_MDS_STATE_STANDBY:    return "up:standby";
        case CEPH_MDS_STATE_STANDBY_REPLAY:    return "up:standby-replay";
+        case CEPH_MDS_STATE_REPLAYONCE: return "up:oneshot-replay";
        case CEPH_MDS_STATE_CREATING:   return "up:creating";
        case CEPH_MDS_STATE_STARTING:   return "up:starting";
                /* up and in */
@@ -50,10 +51,13 @@ const char *ceph_mds_op_name(int op)
        case CEPH_MDS_OP_LOOKUP:  return "lookup";
        case CEPH_MDS_OP_LOOKUPHASH:  return "lookuphash";
        case CEPH_MDS_OP_LOOKUPPARENT:  return "lookupparent";
+        case CEPH_MDS_OP_LOOKUPINO:  return "lookupino";
        case CEPH_MDS_OP_GETATTR:  return "getattr";
        case CEPH_MDS_OP_SETXATTR: return "setxattr";
        case CEPH_MDS_OP_SETATTR: return "setattr";
        case CEPH_MDS_OP_RMXATTR: return "rmxattr";
+        case CEPH_MDS_OP_SETLAYOUT: return "setlayou";
+        case CEPH_MDS_OP_SETDIRLAYOUT: return "setdirlayout";
        case CEPH_MDS_OP_READDIR: return "readdir";
        case CEPH_MDS_OP_MKNOD: return "mknod";
        case CEPH_MDS_OP_LINK: return "link";
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index e86aa9948124..9fe17c6c2876 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -71,8 +71,14 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
        /*
         * express utilization in terms of large blocks to avoid
         * overflow on 32-bit machines.
+         *
+         * NOTE: for the time being, we make bsize == frsize to humor
+         * not-yet-ancient versions of glibc that are broken.
+         * Someday, we will probably want to report a real block
+         * size...  whatever that may mean for a network file system!
         */
        buf->f_bsize = 1 << CEPH_BLOCK_SHIFT;
+        buf->f_frsize = 1 << CEPH_BLOCK_SHIFT;
        buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10);
        buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
        buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
@@ -80,7 +86,6 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
        buf->f_files = le64_to_cpu(st.num_objects);
        buf->f_ffree = -1;
        buf->f_namelen = NAME_MAX;
-        buf->f_frsize = PAGE_CACHE_SIZE;
        /* leave fsid little-endian, regardless of host endianness */
        fsid = *(u64 *)(&monmap->fsid) ^ *((u64 *)&monmap->fsid + 1);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 66ebe720e40d..c7b309723dcc 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -21,7 +21,7 @@
 /* large granularity for statfs utilization stats to facilitate
 * large volume sizes on 32-bit machines. */
-#define CEPH_BLOCK_SHIFT   20  /* 1 MB */
+#define CEPH_BLOCK_SHIFT   22  /* 4 MB */
 #define CEPH_BLOCK         (1 << CEPH_BLOCK_SHIFT)
 #define CEPH_MOUNT_OPT_DIRSTAT         (1<<4) /* `cat dirname` for stats */
@@ -138,8 +138,8 @@ struct ceph_cap_snap {
        struct ceph_snap_context *context;
        umode_t mode;
-        uid_t uid;
+        kuid_t uid;
-        gid_t gid;
+        kgid_t gid;
        struct ceph_buffer *xattr_blob;
        u64 xattr_version;
@@ -798,13 +798,7 @@ extern int ceph_mmap(struct file *file, struct vm_area_struct *vma);
 /* file.c */
 extern const struct file_operations ceph_file_fops;
 extern const struct address_space_operations ceph_aops;
-extern int ceph_copy_to_page_vector(struct page **pages,
-                                    const char *data,
-                                    loff_t off, size_t len);
-extern int ceph_copy_from_page_vector(struct page **pages,
-                                    char *data,
-                                    loff_t off, size_t len);
-extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
 extern int ceph_open(struct inode *inode, struct file *file);
 extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
                            struct file *file, unsigned flags, umode_t mode,
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 2c2ae5be9902..9b6b2b6dd164 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -29,9 +29,94 @@ struct ceph_vxattr {
        size_t name_size;       /* strlen(name) + 1 (for '\0') */
        size_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val,
                              size_t size);
-        bool readonly;
+        bool readonly, hidden;
+        bool (*exists_cb)(struct ceph_inode_info *ci);
 };
+/* layouts */
+static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci)
+{
+        size_t s;
+        char *p = (char *)&ci->i_layout;
+        for (s = 0; s < sizeof(ci->i_layout); s++, p++)
+                if (*p)
+                        return true;
+        return false;
+}
+static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
+                                        size_t size)
+{
+        int ret;
+        struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
+        struct ceph_osd_client *osdc = &fsc->client->osdc;
+        s64 pool = ceph_file_layout_pg_pool(ci->i_layout);
+        const char *pool_name;
+        dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode);
+        down_read(&osdc->map_sem);
+        pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
+        if (pool_name)
+                ret = snprintf(val, size,
+                "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%s",
+                (unsigned long long)ceph_file_layout_su(ci->i_layout),
+                (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
+                (unsigned long long)ceph_file_layout_object_size(ci->i_layout),
+                pool_name);
+        else
+                ret = snprintf(val, size,
+                "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%lld",
+                (unsigned long long)ceph_file_layout_su(ci->i_layout),
+                (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
+                (unsigned long long)ceph_file_layout_object_size(ci->i_layout),
+                (unsigned long long)pool);
+        up_read(&osdc->map_sem);
+        return ret;
+}
+static size_t ceph_vxattrcb_layout_stripe_unit(struct ceph_inode_info *ci,
+                                               char *val, size_t size)
+{
+        return snprintf(val, size, "%lld",
+                        (unsigned long long)ceph_file_layout_su(ci->i_layout));
+}
+static size_t ceph_vxattrcb_layout_stripe_count(struct ceph_inode_info *ci,
+                                                char *val, size_t size)
+{
+        return snprintf(val, size, "%lld",
+               (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout));
+}
+static size_t ceph_vxattrcb_layout_object_size(struct ceph_inode_info *ci,
+                                               char *val, size_t size)
+{
+        return snprintf(val, size, "%lld",
+               (unsigned long long)ceph_file_layout_object_size(ci->i_layout));
+}
+static size_t ceph_vxattrcb_layout_pool(struct ceph_inode_info *ci,
+                                        char *val, size_t size)
+{
+        int ret;
+        struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
+        struct ceph_osd_client *osdc = &fsc->client->osdc;
+        s64 pool = ceph_file_layout_pg_pool(ci->i_layout);
+        const char *pool_name;
+        down_read(&osdc->map_sem);
+        pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
+        if (pool_name)
+                ret = snprintf(val, size, "%s", pool_name);
+        else
+                ret = snprintf(val, size, "%lld", (unsigned long long)pool);
+        up_read(&osdc->map_sem);
+        return ret;
+}
 /* directories */
 static size_t ceph_vxattrcb_dir_entries(struct ceph_inode_info *ci, char *val,
@@ -83,17 +168,43 @@ static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val,
                        (long)ci->i_rctime.tv_nsec);
 }
-#define CEPH_XATTR_NAME(_type, _name)   XATTR_CEPH_PREFIX #_type "." #_name
-#define XATTR_NAME_CEPH(_type, _name) \
+#define CEPH_XATTR_NAME(_type, _name)   XATTR_CEPH_PREFIX #_type "." #_name
-                { \
+#define CEPH_XATTR_NAME2(_type, _name, _name2)  \
-                        .name = CEPH_XATTR_NAME(_type, _name), \
+        XATTR_CEPH_PREFIX #_type "." #_name "." #_name2
-                        .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \
-                        .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \
+#define XATTR_NAME_CEPH(_type, _name)                                   \
-                        .readonly = true, \
+        {                                                               \
-                }
+                .name = CEPH_XATTR_NAME(_type, _name),                  \
+                .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \
+                .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \
+                .readonly = true,                               \
+                .hidden = false,                                \
+                .exists_cb = NULL,                      \
+        }
+#define XATTR_LAYOUT_FIELD(_type, _name, _field)                        \
+        {                                                               \
+                .name = CEPH_XATTR_NAME2(_type, _name, _field), \
+                .name_size = sizeof (CEPH_XATTR_NAME2(_type, _name, _field)), \
+                .getxattr_cb = ceph_vxattrcb_ ## _name ## _ ## _field, \
+                .readonly = false,                              \
+                .hidden = true,                 \
+                .exists_cb = ceph_vxattrcb_layout_exists,       \
+        }
 static struct ceph_vxattr ceph_dir_vxattrs[] = {
+        {
+                .name = "ceph.dir.layout",
+                .name_size = sizeof("ceph.dir.layout"),
+                .getxattr_cb = ceph_vxattrcb_layout,
+                .readonly = false,
+                .hidden = false,
+                .exists_cb = ceph_vxattrcb_layout_exists,
+        },
+        XATTR_LAYOUT_FIELD(dir, layout, stripe_unit),
+        XATTR_LAYOUT_FIELD(dir, layout, stripe_count),
+        XATTR_LAYOUT_FIELD(dir, layout, object_size),
+        XATTR_LAYOUT_FIELD(dir, layout, pool),
        XATTR_NAME_CEPH(dir, entries),
        XATTR_NAME_CEPH(dir, files),
        XATTR_NAME_CEPH(dir, subdirs),
@@ -102,35 +213,26 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = {
        XATTR_NAME_CEPH(dir, rsubdirs),
        XATTR_NAME_CEPH(dir, rbytes),
        XATTR_NAME_CEPH(dir, rctime),
-        { 0 }   /* Required table terminator */
+        { .name = NULL, 0 }     /* Required table terminator */
 };
 static size_t ceph_dir_vxattrs_name_size;       /* total size of all names */
 /* files */
-static size_t ceph_vxattrcb_file_layout(struct ceph_inode_info *ci, char *val,
-                                   size_t size)
-{
-        int ret;
-        ret = snprintf(val, size,
-                "chunk_bytes=%lld\nstripe_count=%lld\nobject_size=%lld\n",
-                (unsigned long long)ceph_file_layout_su(ci->i_layout),
-                (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
-                (unsigned long long)ceph_file_layout_object_size(ci->i_layout));
-        return ret;
-}
 static struct ceph_vxattr ceph_file_vxattrs[] = {
-        XATTR_NAME_CEPH(file, layout),
-        /* The following extended attribute name is deprecated */
        {
-                .name = XATTR_CEPH_PREFIX "layout",
+                .name = "ceph.file.layout",
-                .name_size = sizeof (XATTR_CEPH_PREFIX "layout"),
+                .name_size = sizeof("ceph.file.layout"),
-                .getxattr_cb = ceph_vxattrcb_file_layout,
+                .getxattr_cb = ceph_vxattrcb_layout,
-                .readonly = true,
+                .readonly = false,
+                .hidden = false,
+                .exists_cb = ceph_vxattrcb_layout_exists,
        },
-        { 0 }   /* Required table terminator */
+        XATTR_LAYOUT_FIELD(file, layout, stripe_unit),
+        XATTR_LAYOUT_FIELD(file, layout, stripe_count),
+        XATTR_LAYOUT_FIELD(file, layout, object_size),
+        XATTR_LAYOUT_FIELD(file, layout, pool),
+        { .name = NULL, 0 }     /* Required table terminator */
 };
 static size_t ceph_file_vxattrs_name_size;      /* total size of all names */
@@ -164,7 +266,8 @@ static size_t __init vxattrs_name_size(struct ceph_vxattr *vxattrs)
        size_t size = 0;
        for (vxattr = vxattrs; vxattr->name; vxattr++)
-                size += vxattr->name_size;
+                if (!vxattr->hidden)
+                        size += vxattr->name_size;
        return size;
 }
@@ -572,13 +675,17 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
        if (!ceph_is_valid_xattr(name))
                return -ENODATA;
-        /* let's see if a virtual xattr was requested */
-        vxattr = ceph_match_vxattr(inode, name);
        spin_lock(&ci->i_ceph_lock);
        dout("getxattr %p ver=%lld index_ver=%lld\n", inode,
             ci->i_xattrs.version, ci->i_xattrs.index_version);
+        /* let's see if a virtual xattr was requested */
+        vxattr = ceph_match_vxattr(inode, name);
+        if (vxattr && !(vxattr->exists_cb && !vxattr->exists_cb(ci))) {
+                err = vxattr->getxattr_cb(ci, value, size);
+                goto out;
+        }
        if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) &&
            (ci->i_xattrs.index_version >= ci->i_xattrs.version)) {
                goto get_xattr;
@@ -592,11 +699,6 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
        spin_lock(&ci->i_ceph_lock);
-        if (vxattr && vxattr->readonly) {
-                err = vxattr->getxattr_cb(ci, value, size);
-                goto out;
-        }
        err = __build_xattrs(inode);
        if (err < 0)
                goto out;
@@ -604,11 +706,8 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
 get_xattr:
        err = -ENODATA;  /* == ENOATTR */
        xattr = __get_xattr(ci, name);
-        if (!xattr) {
+        if (!xattr)
-                if (vxattr)
-                        err = vxattr->getxattr_cb(ci, value, size);
                goto out;
-        }
        err = -ERANGE;
        if (size && size < xattr->val_len)
@@ -664,23 +763,30 @@ list_xattr:
        vir_namelen = ceph_vxattrs_name_size(vxattrs);
        /* adding 1 byte per each variable due to the null termination */
-        namelen = vir_namelen + ci->i_xattrs.names_size + ci->i_xattrs.count;
+        namelen = ci->i_xattrs.names_size + ci->i_xattrs.count;
        err = -ERANGE;
-        if (size && namelen > size)
+        if (size && vir_namelen + namelen > size)
                goto out;
-        err = namelen;
+        err = namelen + vir_namelen;
        if (size == 0)
                goto out;
        names = __copy_xattr_names(ci, names);
        /* virtual xattr names, too */
-        if (vxattrs)
+        err = namelen;
+        if (vxattrs) {
                for (i = 0; vxattrs[i].name; i++) {
-                        len = sprintf(names, "%s", vxattrs[i].name);
+                        if (!vxattrs[i].hidden &&
-                        names += len + 1;
+                            !(vxattrs[i].exists_cb &&
+                              !vxattrs[i].exists_cb(ci))) {
+                                len = sprintf(names, "%s", vxattrs[i].name);
+                                names += len + 1;
+                                err += len + 1;
+                        }
                }
+        }
 out:
        spin_unlock(&ci->i_ceph_lock);
@@ -782,6 +888,10 @@ int ceph_setxattr(struct dentry *dentry, const char *name,
        if (vxattr && vxattr->readonly)
                return -EOPNOTSUPP;
+        /* pass any unhandled ceph.* xattrs through to the MDS */
+        if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
+                goto do_sync_unlocked;
        /* preallocate memory for xattr name, value, index node */
        err = -ENOMEM;
        newname = kmemdup(name, name_len + 1, GFP_NOFS);
@@ -838,6 +948,7 @@ retry:
 do_sync:
        spin_unlock(&ci->i_ceph_lock);
+do_sync_unlocked:
        err = ceph_sync_setxattr(dentry, name, value, size, flags);
 out:
        kfree(newname);
@@ -892,6 +1003,10 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
        if (vxattr && vxattr->readonly)
                return -EOPNOTSUPP;
+        /* pass any unhandled ceph.* xattrs through to the MDS */
+        if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
+                goto do_sync_unlocked;
        err = -ENOMEM;
        spin_lock(&ci->i_ceph_lock);
 retry:
@@ -931,6 +1046,7 @@ retry:
        return err;
 do_sync:
        spin_unlock(&ci->i_ceph_lock);
+do_sync_unlocked:
        err = ceph_send_removexattr(dentry, name);
 out:
        return err;
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 21ff76c22a17..2906ee276408 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -155,14 +155,14 @@ config CIFS_DFS_UPCALL
            points. If unsure, say N.
 config CIFS_NFSD_EXPORT
-          bool "Allow nfsd to export CIFS file system (EXPERIMENTAL)"
+          bool "Allow nfsd to export CIFS file system"
-          depends on CIFS && EXPERIMENTAL && BROKEN
+          depends on CIFS && BROKEN
          help
           Allows NFS server to export a CIFS mounted share (nfsd over cifs)
 config CIFS_SMB2
-        bool "SMB2 network file system support (EXPERIMENTAL)"
+        bool "SMB2 network file system support"
-        depends on CIFS && EXPERIMENTAL && INET
+        depends on CIFS && INET
        select NLS
        select KEYS
        select FSCACHE
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index ce5cbd717bfc..210fce2df308 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -226,6 +226,8 @@ compose_mount_options_out:
 compose_mount_options_err:
        kfree(mountdata);
        mountdata = ERR_PTR(rc);
+        kfree(*devname);
+        *devname = NULL;
        goto compose_mount_options_out;
 }
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index c865bfdfe819..37e4a72a7d1c 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -55,10 +55,10 @@ struct cifs_sb_info {
        unsigned int wsize;
        unsigned long actimeo; /* attribute cache timeout (jiffies) */
        atomic_t active;
-        uid_t   mnt_uid;
+        kuid_t  mnt_uid;
-        gid_t   mnt_gid;
+        kgid_t  mnt_gid;
-        uid_t   mnt_backupuid;
+        kuid_t  mnt_backupuid;
-        gid_t   mnt_backupgid;
+        kgid_t  mnt_backupgid;
        umode_t mnt_file_mode;
        umode_t mnt_dir_mode;
        unsigned int mnt_cifs_flags;
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 086f381d6489..10e774761299 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -149,10 +149,12 @@ cifs_get_spnego_key(struct cifs_ses *sesInfo)
                goto out;
        dp = description + strlen(description);
-        sprintf(dp, ";uid=0x%x", sesInfo->linux_uid);
+        sprintf(dp, ";uid=0x%x",
+                from_kuid_munged(&init_user_ns, sesInfo->linux_uid));
        dp = description + strlen(description);
-        sprintf(dp, ";creduid=0x%x", sesInfo->cred_uid);
+        sprintf(dp, ";creduid=0x%x",
+                from_kuid_munged(&init_user_ns, sesInfo->cred_uid));
        if (sesInfo->user_name) {
                dp = description + strlen(description);
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 5cbd00e74067..f1e3f25fe004 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -266,8 +266,8 @@ sid_to_id(struct cifs_sb_info *cifs_sb, struct cifs_sid *psid,
        struct key *sidkey;
        char *sidstr;
        const struct cred *saved_cred;
-        uid_t fuid = cifs_sb->mnt_uid;
+        kuid_t fuid = cifs_sb->mnt_uid;
-        gid_t fgid = cifs_sb->mnt_gid;
+        kgid_t fgid = cifs_sb->mnt_gid;
        /*
         * If we have too many subauthorities, then something is really wrong.
@@ -297,6 +297,7 @@ sid_to_id(struct cifs_sb_info *cifs_sb, struct cifs_sid *psid,
         * probably a safe assumption but might be better to check based on
         * sidtype.
         */
+        BUILD_BUG_ON(sizeof(uid_t) != sizeof(gid_t));
        if (sidkey->datalen != sizeof(uid_t)) {
                rc = -EIO;
                cFYI(1, "%s: Downcall contained malformed key "
@@ -305,10 +306,21 @@ sid_to_id(struct cifs_sb_info *cifs_sb, struct cifs_sid *psid,
                goto out_key_put;
        }
-        if (sidtype == SIDOWNER)
+        if (sidtype == SIDOWNER) {
-                memcpy(&fuid, &sidkey->payload.value, sizeof(uid_t));
+                kuid_t uid;
-        else
+                uid_t id;
-                memcpy(&fgid, &sidkey->payload.value, sizeof(gid_t));
+                memcpy(&id, &sidkey->payload.value, sizeof(uid_t));
+                uid = make_kuid(&init_user_ns, id);
+                if (uid_valid(uid))
+                        fuid = uid;
+        } else {
+                kgid_t gid;
+                gid_t id;
+                memcpy(&id, &sidkey->payload.value, sizeof(gid_t));
+                gid = make_kgid(&init_user_ns, id);
+                if (gid_valid(gid))
+                        fgid = gid;
+        }
 out_key_put:
        key_put(sidkey);
@@ -346,7 +358,8 @@ init_cifs_idmap(void)
        if (!cred)
                return -ENOMEM;
-        keyring = keyring_alloc(".cifs_idmap", 0, 0, cred,
+        keyring = keyring_alloc(".cifs_idmap",
+                                GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred,
                                (KEY_POS_ALL & ~KEY_POS_SETATTR) |
                                KEY_USR_VIEW | KEY_USR_READ,
                                KEY_ALLOC_NOT_IN_QUOTA, NULL);
@@ -774,7 +787,7 @@ static int parse_sec_desc(struct cifs_sb_info *cifs_sb,
 /* Convert permission bits from mode to equivalent CIFS ACL */
 static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
-        __u32 secdesclen, __u64 nmode, uid_t uid, gid_t gid, int *aclflag)
+        __u32 secdesclen, __u64 nmode, kuid_t uid, kgid_t gid, int *aclflag)
 {
        int rc = 0;
        __u32 dacloffset;
@@ -806,17 +819,19 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
                *aclflag = CIFS_ACL_DACL;
        } else {
                memcpy(pnntsd, pntsd, secdesclen);
-                if (uid != NO_CHANGE_32) { /* chown */
+                if (uid_valid(uid)) { /* chown */
+                        uid_t id;
                        owner_sid_ptr = (struct cifs_sid *)((char *)pnntsd +
                                        le32_to_cpu(pnntsd->osidoffset));
                        nowner_sid_ptr = kmalloc(sizeof(struct cifs_sid),
                                                                GFP_KERNEL);
                        if (!nowner_sid_ptr)
                                return -ENOMEM;
-                        rc = id_to_sid(uid, SIDOWNER, nowner_sid_ptr);
+                        id = from_kuid(&init_user_ns, uid);
+                        rc = id_to_sid(id, SIDOWNER, nowner_sid_ptr);
                        if (rc) {
                                cFYI(1, "%s: Mapping error %d for owner id %d",
-                                                __func__, rc, uid);
+                                                __func__, rc, id);
                                kfree(nowner_sid_ptr);
                                return rc;
                        }
@@ -824,17 +839,19 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
                        kfree(nowner_sid_ptr);
                        *aclflag = CIFS_ACL_OWNER;
                }
-                if (gid != NO_CHANGE_32) { /* chgrp */
+                if (gid_valid(gid)) { /* chgrp */
+                        gid_t id;
                        group_sid_ptr = (struct cifs_sid *)((char *)pnntsd +
                                        le32_to_cpu(pnntsd->gsidoffset));
                        ngroup_sid_ptr = kmalloc(sizeof(struct cifs_sid),
                                                                GFP_KERNEL);
                        if (!ngroup_sid_ptr)
                                return -ENOMEM;
-                        rc = id_to_sid(gid, SIDGROUP, ngroup_sid_ptr);
+                        id = from_kgid(&init_user_ns, gid);
+                        rc = id_to_sid(id, SIDGROUP, ngroup_sid_ptr);
                        if (rc) {
                                cFYI(1, "%s: Mapping error %d for group id %d",
-                                                __func__, rc, gid);
+                                                __func__, rc, id);
                                kfree(ngroup_sid_ptr);
                                return rc;
                        }
@@ -1002,7 +1019,7 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
 /* Convert mode bits to an ACL so we can update the ACL on the server */
 int
 id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode,
-                        uid_t uid, gid_t gid)
+                        kuid_t uid, kgid_t gid)
 {
        int rc = 0;
        int aclflag = CIFS_ACL_DACL; /* default flag to set */
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index f653835d067b..4bad7b16271f 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -228,7 +228,6 @@ cifs_alloc_inode(struct super_block *sb)
        cifs_set_oplock_level(cifs_inode, 0);
        cifs_inode->delete_pending = false;
        cifs_inode->invalid_mapping = false;
-        cifs_inode->leave_pages_clean = false;
        cifs_inode->vfs_inode.i_blkbits = 14;  /* 2**14 = CIFS_MAX_MSGSIZE */
        cifs_inode->server_eof = 0;
        cifs_inode->uniqueid = 0;
@@ -376,13 +375,15 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
                                   (int)(srcaddr->sa_family));
        }
-        seq_printf(s, ",uid=%u", cifs_sb->mnt_uid);
+        seq_printf(s, ",uid=%u",
+                   from_kuid_munged(&init_user_ns, cifs_sb->mnt_uid));
        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)
                seq_printf(s, ",forceuid");
        else
                seq_printf(s, ",noforceuid");
-        seq_printf(s, ",gid=%u", cifs_sb->mnt_gid);
+        seq_printf(s, ",gid=%u",
+                   from_kgid_munged(&init_user_ns, cifs_sb->mnt_gid));
        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)
                seq_printf(s, ",forcegid");
        else
@@ -437,9 +438,13 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM)
                seq_printf(s, ",noperm");
        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPUID)
-                seq_printf(s, ",backupuid=%u", cifs_sb->mnt_backupuid);
+                seq_printf(s, ",backupuid=%u",
+                           from_kuid_munged(&init_user_ns,
+                                            cifs_sb->mnt_backupuid));
        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPGID)
-                seq_printf(s, ",backupgid=%u", cifs_sb->mnt_backupgid);
+                seq_printf(s, ",backupgid=%u",
+                           from_kgid_munged(&init_user_ns,
+                                            cifs_sb->mnt_backupgid));
        seq_printf(s, ",rsize=%u", cifs_sb->rsize);
        seq_printf(s, ",wsize=%u", cifs_sb->wsize);
@@ -678,7 +683,7 @@ out_nls:
 static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
                                   unsigned long nr_segs, loff_t pos)
 {
-        struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(iocb->ki_filp);
        ssize_t written;
        int rc;
@@ -702,7 +707,7 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int whence)
         */
        if (whence != SEEK_SET && whence != SEEK_CUR) {
                int rc;
-                struct inode *inode = file->f_path.dentry->d_inode;
+                struct inode *inode = file_inode(file);
                /*
                 * We need to be sure that all dirty pages are written and the
@@ -734,7 +739,7 @@ static int cifs_setlease(struct file *file, long arg, struct file_lock **lease)
 {
        /* note that this is called by vfs setlease with lock_flocks held
           to protect *lease from going away */
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(file);
        struct cifsFileInfo *cfile = file->private_data;
        if (!(S_ISREG(inode->i_mode)))
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index aea1eec64911..4f07f6fbe494 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -386,6 +386,7 @@ struct smb_version_values {
        unsigned int    cap_unix;
        unsigned int    cap_nt_find;
        unsigned int    cap_large_files;
+        unsigned int    oplock_read;
 };
 #define HEADER_SIZE(server) (server->vals->header_size)
@@ -399,11 +400,11 @@ struct smb_vol {
        char *iocharset;  /* local code page for mapping to and from Unicode */
        char source_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* clnt nb name */
        char target_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* srvr nb name */
-        uid_t cred_uid;
+        kuid_t cred_uid;
-        uid_t linux_uid;
+        kuid_t linux_uid;
-        gid_t linux_gid;
+        kgid_t linux_gid;
-        uid_t backupuid;
+        kuid_t backupuid;
-        gid_t backupgid;
+        kgid_t backupgid;
        umode_t file_mode;
        umode_t dir_mode;
        unsigned secFlg;
@@ -702,8 +703,8 @@ struct cifs_ses {
        char *serverNOS;        /* name of network operating system of server */
        char *serverDomain;     /* security realm of server */
        __u64 Suid;             /* remote smb uid  */
-        uid_t linux_uid;        /* overriding owner of files on the mount */
+        kuid_t linux_uid;       /* overriding owner of files on the mount */
-        uid_t cred_uid;         /* owner of credentials */
+        kuid_t cred_uid;        /* owner of credentials */
        unsigned int capabilities;
        char serverName[SERVER_NAME_LEN_WITH_NULL * 2]; /* BB make bigger for
                                TCP names - will ipv6 and sctp addresses fit? */
@@ -837,7 +838,7 @@ struct cifs_tcon {
 */
 struct tcon_link {
        struct rb_node          tl_rbnode;
-        uid_t                   tl_uid;
+        kuid_t                  tl_uid;
        unsigned long           tl_flags;
 #define TCON_LINK_MASTER        0
 #define TCON_LINK_PENDING       1
@@ -930,7 +931,7 @@ struct cifsFileInfo {
        struct list_head tlist; /* pointer to next fid owned by tcon */
        struct list_head flist; /* next fid (file instance) for this inode */
        struct cifs_fid_locks *llist;   /* brlocks held by this fid */
-        unsigned int uid;       /* allows finding which FileInfo structure */
+        kuid_t uid;             /* allows finding which FileInfo structure */
        __u32 pid;              /* process id who opened file */
        struct cifs_fid fid;    /* file id from remote */
        /* BB add lock scope info here if needed */ ;
@@ -1030,7 +1031,6 @@ struct cifsInodeInfo {
        bool clientCanCacheAll;         /* read and writebehind oplock */
        bool delete_pending;            /* DELETE_ON_CLOSE is set */
        bool invalid_mapping;           /* pagecache is invalid */
-        bool leave_pages_clean; /* protected by i_mutex, not set pages dirty */
        unsigned long time;             /* jiffies of last update of inode */
        u64  server_eof;                /* current file size on server -- protected by i_lock */
        u64  uniqueid;                  /* server inode number */
@@ -1245,8 +1245,8 @@ struct cifs_fattr {
        u64             cf_eof;
        u64             cf_bytes;
        u64             cf_createtime;
-        uid_t           cf_uid;
+        kuid_t          cf_uid;
-        gid_t           cf_gid;
+        kgid_t          cf_gid;
        umode_t         cf_mode;
        dev_t           cf_rdev;
        unsigned int    cf_nlink;
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index b9d59a948a2c..e996ff6b26d1 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -277,7 +277,6 @@
 #define CIFS_NO_HANDLE        0xFFFF
 #define NO_CHANGE_64          0xFFFFFFFFFFFFFFFFULL
-#define NO_CHANGE_32          0xFFFFFFFFUL
 /* IPC$ in ASCII */
 #define CIFS_IPC_RESOURCE "\x49\x50\x43\x24"
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 1988c1baa224..f450f0683ddd 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -46,7 +46,8 @@ extern void _free_xid(unsigned int);
 ({                                                              \
        unsigned int __xid = _get_xid();                                \
        cFYI(1, "CIFS VFS: in %s as Xid: %u with uid: %d",      \
-             __func__, __xid, current_fsuid());                 \
+             __func__, __xid,                                   \
+             from_kuid(&init_user_ns, current_fsuid()));        \
        __xid;                                                  \
 })
@@ -161,7 +162,7 @@ extern int cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb,
                              struct cifs_fattr *fattr, struct inode *inode,
                              const char *path, const __u16 *pfid);
 extern int id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64,
-                                        uid_t, gid_t);
+                                        kuid_t, kgid_t);
 extern struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *, struct inode *,
                                        const char *, u32 *);
 extern int set_cifs_acl(struct cifs_ntsd *, __u32, struct inode *,
@@ -304,8 +305,8 @@ struct cifs_unix_set_info_args {
        __u64   atime;
        __u64   mtime;
        __u64   mode;
-        __u64   uid;
+        kuid_t  uid;
-        __u64   gid;
+        kgid_t  gid;
        dev_t   device;
 };
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 76d0d2998850..00e12f2d626b 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -5819,8 +5819,14 @@ static void
 cifs_fill_unix_set_info(FILE_UNIX_BASIC_INFO *data_offset,
                        const struct cifs_unix_set_info_args *args)
 {
+        u64 uid = NO_CHANGE_64, gid = NO_CHANGE_64;
        u64 mode = args->mode;
+        if (uid_valid(args->uid))
+                uid = from_kuid(&init_user_ns, args->uid);
+        if (gid_valid(args->gid))
+                gid = from_kgid(&init_user_ns, args->gid);
        /*
         * Samba server ignores set of file size to zero due to bugs in some
         * older clients, but we should be precise - we use SetFileSize to
@@ -5833,8 +5839,8 @@ cifs_fill_unix_set_info(FILE_UNIX_BASIC_INFO *data_offset,
        data_offset->LastStatusChange = cpu_to_le64(args->ctime);
        data_offset->LastAccessTime = cpu_to_le64(args->atime);
        data_offset->LastModificationTime = cpu_to_le64(args->mtime);
-        data_offset->Uid = cpu_to_le64(args->uid);
+        data_offset->Uid = cpu_to_le64(uid);
-        data_offset->Gid = cpu_to_le64(args->gid);
+        data_offset->Gid = cpu_to_le64(gid);
        /* better to leave device as zero when it is  */
        data_offset->DevMajor = cpu_to_le64(MAJOR(args->device));
        data_offset->DevMinor = cpu_to_le64(MINOR(args->device));
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 17c3643e5950..4474a57f30ab 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -987,6 +987,41 @@ static int get_option_ul(substring_t args[], unsigned long *option)
        return rc;
 }
+static int get_option_uid(substring_t args[], kuid_t *result)
+{
+        unsigned long value;
+        kuid_t uid;
+        int rc;
+        rc = get_option_ul(args, &value);
+        if (rc)
+                return rc;
+        uid = make_kuid(current_user_ns(), value);
+        if (!uid_valid(uid))
+                return -EINVAL;
+        *result = uid;
+        return 0;
+}
+static int get_option_gid(substring_t args[], kgid_t *result)
+{
+        unsigned long value;
+        kgid_t gid;
+        int rc;
+        rc = get_option_ul(args, &value);
+        if (rc)
+                return rc;
+        gid = make_kgid(current_user_ns(), value);
+        if (!gid_valid(gid))
+                return -EINVAL;
+        *result = gid;
+        return 0;
+}
 static int cifs_parse_security_flavors(char *value,
                                       struct smb_vol *vol)
@@ -1424,47 +1459,42 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
                /* Numeric Values */
                case Opt_backupuid:
-                        if (get_option_ul(args, &option)) {
+                        if (get_option_uid(args, &vol->backupuid)) {
                                cERROR(1, "%s: Invalid backupuid value",
                                        __func__);
                                goto cifs_parse_mount_err;
                        }
-                        vol->backupuid = option;
                        vol->backupuid_specified = true;
                        break;
                case Opt_backupgid:
-                        if (get_option_ul(args, &option)) {
+                        if (get_option_gid(args, &vol->backupgid)) {
                                cERROR(1, "%s: Invalid backupgid value",
                                        __func__);
                                goto cifs_parse_mount_err;
                        }
-                        vol->backupgid = option;
                        vol->backupgid_specified = true;
                        break;
                case Opt_uid:
-                        if (get_option_ul(args, &option)) {
+                        if (get_option_uid(args, &vol->linux_uid)) {
                                cERROR(1, "%s: Invalid uid value",
                                        __func__);
                                goto cifs_parse_mount_err;
                        }
-                        vol->linux_uid = option;
                        uid_specified = true;
                        break;
                case Opt_cruid:
-                        if (get_option_ul(args, &option)) {
+                        if (get_option_uid(args, &vol->cred_uid)) {
                                cERROR(1, "%s: Invalid cruid value",
                                        __func__);
                                goto cifs_parse_mount_err;
                        }
-                        vol->cred_uid = option;
                        break;
                case Opt_gid:
-                        if (get_option_ul(args, &option)) {
+                        if (get_option_gid(args, &vol->linux_gid)) {
                                cERROR(1, "%s: Invalid gid value",
                                                __func__);
                                goto cifs_parse_mount_err;
                        }
-                        vol->linux_gid = option;
                        gid_specified = true;
                        break;
                case Opt_file_mode:
@@ -1917,7 +1947,7 @@ srcip_matches(struct sockaddr *srcaddr, struct sockaddr *rhs)
        }
        case AF_INET6: {
                struct sockaddr_in6 *saddr6 = (struct sockaddr_in6 *)srcaddr;
-                struct sockaddr_in6 *vaddr6 = (struct sockaddr_in6 *)&rhs;
+                struct sockaddr_in6 *vaddr6 = (struct sockaddr_in6 *)rhs;
                return ipv6_addr_equal(&saddr6->sin6_addr, &vaddr6->sin6_addr);
        }
        default:
@@ -2241,7 +2271,7 @@ static int match_session(struct cifs_ses *ses, struct smb_vol *vol)
 {
        switch (ses->server->secType) {
        case Kerberos:
-                if (vol->cred_uid != ses->cred_uid)
+                if (!uid_eq(vol->cred_uid, ses->cred_uid))
                        return 0;
                break;
        default:
@@ -2713,7 +2743,7 @@ compare_mount_options(struct super_block *sb, struct cifs_mnt_data *mnt_data)
        if (new->rsize && new->rsize < old->rsize)
                return 0;
-        if (old->mnt_uid != new->mnt_uid || old->mnt_gid != new->mnt_gid)
+        if (!uid_eq(old->mnt_uid, new->mnt_uid) || !gid_eq(old->mnt_gid, new->mnt_gid))
                return 0;
        if (old->mnt_file_mode != new->mnt_file_mode ||
@@ -3919,7 +3949,7 @@ cifs_set_vol_auth(struct smb_vol *vol, struct cifs_ses *ses)
 }
 static struct cifs_tcon *
-cifs_construct_tcon(struct cifs_sb_info *cifs_sb, uid_t fsuid)
+cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
 {
        int rc;
        struct cifs_tcon *master_tcon = cifs_sb_master_tcon(cifs_sb);
@@ -3989,7 +4019,7 @@ cifs_sb_tcon_pending_wait(void *unused)
 /* find and return a tlink with given uid */
 static struct tcon_link *
-tlink_rb_search(struct rb_root *root, uid_t uid)
+tlink_rb_search(struct rb_root *root, kuid_t uid)
 {
        struct rb_node *node = root->rb_node;
        struct tcon_link *tlink;
@@ -3997,9 +4027,9 @@ tlink_rb_search(struct rb_root *root, uid_t uid)
        while (node) {
                tlink = rb_entry(node, struct tcon_link, tl_rbnode);
-                if (tlink->tl_uid > uid)
+                if (uid_gt(tlink->tl_uid, uid))
                        node = node->rb_left;
-                else if (tlink->tl_uid < uid)
+                else if (uid_lt(tlink->tl_uid, uid))
                        node = node->rb_right;
                else
                        return tlink;
@@ -4018,7 +4048,7 @@ tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink)
                tlink = rb_entry(*new, struct tcon_link, tl_rbnode);
                parent = *new;
-                if (tlink->tl_uid > new_tlink->tl_uid)
+                if (uid_gt(tlink->tl_uid, new_tlink->tl_uid))
                        new = &((*new)->rb_left);
                else
                        new = &((*new)->rb_right);
@@ -4048,7 +4078,7 @@ struct tcon_link *
 cifs_sb_tlink(struct cifs_sb_info *cifs_sb)
 {
        int ret;
-        uid_t fsuid = current_fsuid();
+        kuid_t fsuid = current_fsuid();
        struct tcon_link *tlink, *newtlink;
        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 8719bbe0dcc3..1cd016217448 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -342,14 +342,14 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid,
                *created |= FILE_CREATED;
                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
-                        args.uid = (__u64) current_fsuid();
+                        args.uid = current_fsuid();
                        if (inode->i_mode & S_ISGID)
-                                args.gid = (__u64) inode->i_gid;
+                                args.gid = inode->i_gid;
                        else
-                                args.gid = (__u64) current_fsgid();
+                                args.gid = current_fsgid();
                } else {
-                        args.uid = NO_CHANGE_64;
+                        args.uid = INVALID_UID; /* no change */
-                        args.gid = NO_CHANGE_64;
+                        args.gid = INVALID_GID; /* no change */
                }
                CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid->netfid,
                                       current->tgid);
@@ -588,11 +588,11 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode,
                        .device = device_number,
                };
                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
-                        args.uid = (__u64) current_fsuid();
+                        args.uid = current_fsuid();
-                        args.gid = (__u64) current_fsgid();
+                        args.gid = current_fsgid();
                } else {
-                        args.uid = NO_CHANGE_64;
+                        args.uid = INVALID_UID; /* no change */
-                        args.gid = NO_CHANGE_64;
+                        args.gid = INVALID_GID; /* no change */
                }
                rc = CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, &args,
                                            cifs_sb->local_nls,
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 0a6677ba212b..c16d2a018ab8 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -238,6 +238,23 @@ out:
        return rc;
 }
+static bool
+cifs_has_mand_locks(struct cifsInodeInfo *cinode)
+{
+        struct cifs_fid_locks *cur;
+        bool has_locks = false;
+        down_read(&cinode->lock_sem);
+        list_for_each_entry(cur, &cinode->llist, llist) {
+                if (!list_empty(&cur->locks)) {
+                        has_locks = true;
+                        break;
+                }
+        }
+        up_read(&cinode->lock_sem);
+        return has_locks;
+}
 struct cifsFileInfo *
 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
                  struct tcon_link *tlink, __u32 oplock)
@@ -248,6 +265,7 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
        struct cifsFileInfo *cfile;
        struct cifs_fid_locks *fdlocks;
        struct cifs_tcon *tcon = tlink_tcon(tlink);
+        struct TCP_Server_Info *server = tcon->ses->server;
        cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
        if (cfile == NULL)
@@ -276,12 +294,22 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
        INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
        mutex_init(&cfile->fh_mutex);
+        /*
+         * If the server returned a read oplock and we have mandatory brlocks,
+         * set oplock level to None.
+         */
+        if (oplock == server->vals->oplock_read &&
+                                                cifs_has_mand_locks(cinode)) {
+                cFYI(1, "Reset oplock val from read to None due to mand locks");
+                oplock = 0;
+        }
        spin_lock(&cifs_file_list_lock);
-        if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE)
+        if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
                oplock = fid->pending_open->oplock;
        list_del(&fid->pending_open->olist);
-        tlink_tcon(tlink)->ses->server->ops->set_fid(cfile, fid, oplock);
+        server->ops->set_fid(cfile, fid, oplock);
        list_add(&cfile->tlist, &tcon->openFileList);
        /* if readable file instance put first in list*/
@@ -487,8 +515,8 @@ int cifs_open(struct inode *inode, struct file *file)
                 */
                struct cifs_unix_set_info_args args = {
                        .mode   = inode->i_mode,
-                        .uid    = NO_CHANGE_64,
+                        .uid    = INVALID_UID, /* no change */
-                        .gid    = NO_CHANGE_64,
+                        .gid    = INVALID_GID, /* no change */
                        .ctime  = NO_CHANGE_64,
                        .atime  = NO_CHANGE_64,
                        .mtime  = NO_CHANGE_64,
@@ -919,7 +947,7 @@ static int
 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
 {
        int rc = 0;
-        struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
+        struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
        unsigned char saved_type = flock->fl_type;
        if ((flock->fl_flags & FL_POSIX) == 0)
@@ -946,7 +974,7 @@ cifs_posix_lock_test(struct file *file, struct file_lock *flock)
 static int
 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
 {
-        struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
+        struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
        int rc = 1;
        if ((flock->fl_flags & FL_POSIX) == 0)
@@ -1422,6 +1450,7 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
        struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
        struct TCP_Server_Info *server = tcon->ses->server;
+        struct inode *inode = cfile->dentry->d_inode;
        if (posix_lck) {
                int posix_lock_type;
@@ -1459,6 +1488,21 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
                if (!rc)
                        goto out;
+                /*
+                 * Windows 7 server can delay breaking lease from read to None
+                 * if we set a byte-range lock on a file - break it explicitly
+                 * before sending the lock to the server to be sure the next
+                 * read won't conflict with non-overlapted locks due to
+                 * pagereading.
+                 */
+                if (!CIFS_I(inode)->clientCanCacheAll &&
+                                        CIFS_I(inode)->clientCanCacheRead) {
+                        cifs_invalidate_mapping(inode);
+                        cFYI(1, "Set no oplock for inode=%p due to mand locks",
+                             inode);
+                        CIFS_I(inode)->clientCanCacheRead = false;
+                }
                rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
                                            type, 1, 0, wait_flag);
                if (rc) {
@@ -1504,7 +1548,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
        cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
        netfid = cfile->fid.netfid;
-        cinode = CIFS_I(file->f_path.dentry->d_inode);
+        cinode = CIFS_I(file_inode(file));
        if (cap_unix(tcon->ses) &&
            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
@@ -1649,7 +1693,7 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
           are always at the end of the list but since the first entry might
           have a close pending, we go through the whole list */
        list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
-                if (fsuid_only && open_file->uid != current_fsuid())
+                if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
                        continue;
                if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
                        if (!open_file->invalidHandle) {
@@ -1702,7 +1746,7 @@ refind_writable:
        list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
                if (!any_available && open_file->pid != current->tgid)
                        continue;
-                if (fsuid_only && open_file->uid != current_fsuid())
+                if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
                        continue;
                if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
                        if (!open_file->invalidHandle) {
@@ -2103,15 +2147,7 @@ static int cifs_write_end(struct file *file, struct address_space *mapping,
        } else {
                rc = copied;
                pos += copied;
-                /*
+                set_page_dirty(page);
-                 * When we use strict cache mode and cifs_strict_writev was run
-                 * with level II oplock (indicated by leave_pages_clean field of
-                 * CIFS_I(inode)), we can leave pages clean - cifs_strict_writev
-                 * sent the data to the server itself.
-                 */
-                if (!CIFS_I(inode)->leave_pages_clean ||
-                    !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO))
-                        set_page_dirty(page);
        }
        if (rc > 0) {
@@ -2135,7 +2171,7 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
        struct cifs_tcon *tcon;
        struct TCP_Server_Info *server;
        struct cifsFileInfo *smbfile = file->private_data;
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(file);
        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
        rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
@@ -2210,7 +2246,7 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 */
 int cifs_flush(struct file *file, fl_owner_t id)
 {
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(file);
        int rc = 0;
        if (file->f_mode & FMODE_WRITE)
@@ -2444,7 +2480,7 @@ ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
        ssize_t written;
        struct inode *inode;
-        inode = iocb->ki_filp->f_path.dentry->d_inode;
+        inode = file_inode(iocb->ki_filp);
        /*
         * BB - optimize the way when signing is disabled. We can drop this
@@ -2462,8 +2498,8 @@ ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
 }
 static ssize_t
-cifs_pagecache_writev(struct kiocb *iocb, const struct iovec *iov,
+cifs_writev(struct kiocb *iocb, const struct iovec *iov,
-                      unsigned long nr_segs, loff_t pos, bool cache_ex)
+            unsigned long nr_segs, loff_t pos)
 {
        struct file *file = iocb->ki_filp;
        struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
@@ -2485,12 +2521,8 @@ cifs_pagecache_writev(struct kiocb *iocb, const struct iovec *iov,
                                     server->vals->exclusive_lock_type, NULL,
                                     CIFS_WRITE_OP)) {
                mutex_lock(&inode->i_mutex);
-                if (!cache_ex)
-                        cinode->leave_pages_clean = true;
                rc = __generic_file_aio_write(iocb, iov, nr_segs,
-                                              &iocb->ki_pos);
+                                               &iocb->ki_pos);
-                if (!cache_ex)
-                        cinode->leave_pages_clean = false;
                mutex_unlock(&inode->i_mutex);
        }
@@ -2511,66 +2543,38 @@ ssize_t
 cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
                   unsigned long nr_segs, loff_t pos)
 {
-        struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(iocb->ki_filp);
        struct cifsInodeInfo *cinode = CIFS_I(inode);
        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
        struct cifsFileInfo *cfile = (struct cifsFileInfo *)
                                                iocb->ki_filp->private_data;
        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
-        ssize_t written, written2;
+        ssize_t written;
-        /*
-         * We need to store clientCanCacheAll here to prevent race
-         * conditions - this value can be changed during an execution
-         * of generic_file_aio_write. For CIFS it can be changed from
-         * true to false only, but for SMB2 it can be changed both from
-         * true to false and vice versa. So, we can end up with a data
-         * stored in the cache, not marked dirty and not sent to the
-         * server if this value changes its state from false to true
-         * after cifs_write_end.
-         */
-        bool cache_ex = cinode->clientCanCacheAll;
-        bool cache_read = cinode->clientCanCacheRead;
-        int rc;
-        loff_t saved_pos;
-        if (cache_ex) {
+        if (cinode->clientCanCacheAll) {
                if (cap_unix(tcon->ses) &&
-                    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0) &&
+                (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
-                    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(
+                    && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
-                                                tcon->fsUnixInfo.Capability)))
                        return generic_file_aio_write(iocb, iov, nr_segs, pos);
-                return cifs_pagecache_writev(iocb, iov, nr_segs, pos, cache_ex);
+                return cifs_writev(iocb, iov, nr_segs, pos);
        }
        /*
-         * For files without exclusive oplock in strict cache mode we need to
+         * For non-oplocked files in strict cache mode we need to write the data
-         * write the data to the server exactly from the pos to pos+len-1 rather
+         * to the server exactly from the pos to pos+len-1 rather than flush all
-         * than flush all affected pages because it may cause a error with
+         * affected pages because it may cause a error with mandatory locks on
-         * mandatory locks on these pages but not on the region from pos to
+         * these pages but not on the region from pos to ppos+len-1.
-         * ppos+len-1.
         */
        written = cifs_user_writev(iocb, iov, nr_segs, pos);
-        if (!cache_read || written <= 0)
+        if (written > 0 && cinode->clientCanCacheRead) {
-                return written;
+                /*
+                 * Windows 7 server can delay breaking level2 oplock if a write
-        saved_pos = iocb->ki_pos;
+                 * request comes - break it on the client to prevent reading
-        iocb->ki_pos = pos;
+                 * an old data.
-        /* we have a read oplock - need to store a data in the page cache */
+                 */
-        if (cap_unix(tcon->ses) &&
+                cifs_invalidate_mapping(inode);
-            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0) &&
+                cFYI(1, "Set no oplock for inode=%p after a write operation",
-            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(
+                     inode);
-                                        tcon->fsUnixInfo.Capability)))
+                cinode->clientCanCacheRead = false;
-                written2 = generic_file_aio_write(iocb, iov, nr_segs, pos);
-        else
-                written2 = cifs_pagecache_writev(iocb, iov, nr_segs, pos,
-                                                 cache_ex);
-        /* errors occured during writing - invalidate the page cache */
-        if (written2 < 0) {
-                rc = cifs_invalidate_mapping(inode);
-                if (rc)
-                        written = (ssize_t)rc;
-                else
-                        iocb->ki_pos = saved_pos;
        }
        return written;
 }
@@ -2911,7 +2915,7 @@ ssize_t
 cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
                  unsigned long nr_segs, loff_t pos)
 {
-        struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(iocb->ki_filp);
        struct cifsInodeInfo *cinode = CIFS_I(inode);
        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
        struct cifsFileInfo *cfile = (struct cifsFileInfo *)
@@ -3059,7 +3063,7 @@ static struct vm_operations_struct cifs_file_vm_ops = {
 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
 {
        int rc, xid;
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(file);
        xid = get_xid();
@@ -3352,7 +3356,7 @@ static int cifs_readpage_worker(struct file *file, struct page *page,
        int rc;
        /* Is the page cached? */
-        rc = cifs_readpage_from_fscache(file->f_path.dentry->d_inode, page);
+        rc = cifs_readpage_from_fscache(file_inode(file), page);
        if (rc == 0)
                goto read_complete;
@@ -3367,8 +3371,8 @@ static int cifs_readpage_worker(struct file *file, struct page *page,
        else
                cFYI(1, "Bytes read %d", rc);
-        file->f_path.dentry->d_inode->i_atime =
+        file_inode(file)->i_atime =
-                current_fs_time(file->f_path.dentry->d_inode->i_sb);
+                current_fs_time(file_inode(file)->i_sb);
        if (PAGE_CACHE_SIZE > rc)
                memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
@@ -3377,7 +3381,7 @@ static int cifs_readpage_worker(struct file *file, struct page *page,
        SetPageUptodate(page);
        /* send this page to the cache */
-        cifs_readpage_to_fscache(file->f_path.dentry->d_inode, page);
+        cifs_readpage_to_fscache(file_inode(file), page);
        rc = 0;
@@ -3577,6 +3581,13 @@ void cifs_oplock_break(struct work_struct *work)
        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
        int rc = 0;
+        if (!cinode->clientCanCacheAll && cinode->clientCanCacheRead &&
+                                                cifs_has_mand_locks(cinode)) {
+                cFYI(1, "Reset oplock to None for inode=%p due to mand locks",
+                     inode);
+                cinode->clientCanCacheRead = false;
+        }
        if (inode && S_ISREG(inode->i_mode)) {
                if (cinode->clientCanCacheRead)
                        break_lease(inode, O_RDONLY);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index ed6208ff85a7..83f2606c76d0 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -244,15 +244,25 @@ cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info,
                break;
        }
-        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)
+        fattr->cf_uid = cifs_sb->mnt_uid;
-                fattr->cf_uid = cifs_sb->mnt_uid;
+        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)) {
-        else
+                u64 id = le64_to_cpu(info->Uid);
-                fattr->cf_uid = le64_to_cpu(info->Uid);
+                if (id < ((uid_t)-1)) {
+                        kuid_t uid = make_kuid(&init_user_ns, id);
-        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)
+                        if (uid_valid(uid))
-                fattr->cf_gid = cifs_sb->mnt_gid;
+                                fattr->cf_uid = uid;
-        else
+                }
-                fattr->cf_gid = le64_to_cpu(info->Gid);
+        }
+        
+        fattr->cf_gid = cifs_sb->mnt_gid;
+        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)) {
+                u64 id = le64_to_cpu(info->Gid);
+                if (id < ((gid_t)-1)) {
+                        kgid_t gid = make_kgid(&init_user_ns, id);
+                        if (gid_valid(gid))
+                                fattr->cf_gid = gid;
+                }
+        }
        fattr->cf_nlink = le64_to_cpu(info->Nlinks);
 }
@@ -289,7 +299,7 @@ cifs_get_file_info_unix(struct file *filp)
        unsigned int xid;
        FILE_UNIX_BASIC_INFO find_data;
        struct cifs_fattr fattr;
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
        struct cifsFileInfo *cfile = filp->private_data;
        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
@@ -558,7 +568,7 @@ cifs_get_file_info(struct file *filp)
        unsigned int xid;
        FILE_ALL_INFO find_data;
        struct cifs_fattr fattr;
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
        struct cifsFileInfo *cfile = filp->private_data;
        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
@@ -806,10 +816,9 @@ static bool
 inode_has_hashed_dentries(struct inode *inode)
 {
        struct dentry *dentry;
-        struct hlist_node *p;
        spin_lock(&inode->i_lock);
-        hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) {
+        hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
                if (!d_unhashed(dentry) || IS_ROOT(dentry)) {
                        spin_unlock(&inode->i_lock);
                        return true;
@@ -1245,14 +1254,14 @@ cifs_mkdir_qinfo(struct inode *parent, struct dentry *dentry, umode_t mode,
                        .device = 0,
                };
                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
-                        args.uid = (__u64)current_fsuid();
+                        args.uid = current_fsuid();
                        if (parent->i_mode & S_ISGID)
-                                args.gid = (__u64)parent->i_gid;
+                                args.gid = parent->i_gid;
                        else
-                                args.gid = (__u64)current_fsgid();
+                                args.gid = current_fsgid();
                } else {
-                        args.uid = NO_CHANGE_64;
+                        args.uid = INVALID_UID; /* no change */
-                        args.gid = NO_CHANGE_64;
+                        args.gid = INVALID_GID; /* no change */
                }
                CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args,
                                       cifs_sb->local_nls,
@@ -1678,7 +1687,7 @@ cifs_invalidate_mapping(struct inode *inode)
 int cifs_revalidate_file_attr(struct file *filp)
 {
        int rc = 0;
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct cifsFileInfo *cfile = (struct cifsFileInfo *) filp->private_data;
        if (!cifs_inode_needs_reval(inode))
@@ -1735,7 +1744,7 @@ out:
 int cifs_revalidate_file(struct file *filp)
 {
        int rc;
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        rc = cifs_revalidate_file_attr(filp);
        if (rc)
@@ -2013,12 +2022,12 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
        if (attrs->ia_valid & ATTR_UID)
                args->uid = attrs->ia_uid;
        else
-                args->uid = NO_CHANGE_64;
+                args->uid = INVALID_UID; /* no change */
        if (attrs->ia_valid & ATTR_GID)
                args->gid = attrs->ia_gid;
        else
-                args->gid = NO_CHANGE_64;
+                args->gid = INVALID_GID; /* no change */
        if (attrs->ia_valid & ATTR_ATIME)
                args->atime = cifs_UnixTimeToNT(attrs->ia_atime);
@@ -2086,8 +2095,8 @@ static int
 cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
 {
        unsigned int xid;
-        uid_t uid = NO_CHANGE_32;
+        kuid_t uid = INVALID_UID;
-        gid_t gid = NO_CHANGE_32;
+        kgid_t gid = INVALID_GID;
        struct inode *inode = direntry->d_inode;
        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
        struct cifsInodeInfo *cifsInode = CIFS_I(inode);
@@ -2146,7 +2155,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
 #ifdef CONFIG_CIFS_ACL
        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
-                if (uid != NO_CHANGE_32 || gid != NO_CHANGE_32) {
+                if (uid_valid(uid) || gid_valid(gid)) {
                        rc = id_mode_to_cifs_acl(inode, full_path, NO_CHANGE_64,
                                                        uid, gid);
                        if (rc) {
@@ -2170,7 +2179,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
 #ifdef CONFIG_CIFS_ACL
                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
                        rc = id_mode_to_cifs_acl(inode, full_path, mode,
-                                                NO_CHANGE_32, NO_CHANGE_32);
+                                                INVALID_UID, INVALID_GID);
                        if (rc) {
                                cFYI(1, "%s: Setting ACL failed with error: %d",
                                        __func__, rc);
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index fd5009d56f9f..6c9f1214cf0b 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -30,7 +30,7 @@
 long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
 {
-        struct inode *inode = filep->f_dentry->d_inode;
+        struct inode *inode = file_inode(filep);
        int rc = -ENOTTY; /* strange error - but the precedent */
        unsigned int xid;
        struct cifs_sb_info *cifs_sb;
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 51dc2fb6e854..9f6c4c45d21e 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -76,7 +76,7 @@ symlink_hash(unsigned int link_len, const char *link_str, u8 *md5_hash)
        }
        rc = crypto_shash_update(&sdescmd5->shash, link_str, link_len);
        if (rc) {
-                cERROR(1, "%s: Could not update iwth link_str", __func__);
+                cERROR(1, "%s: Could not update with link_str", __func__);
                goto symlink_hash_err;
        }
        rc = crypto_shash_final(&sdescmd5->shash, md5_hash);
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 3a00c0d0cead..1b15bf839f37 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -569,7 +569,7 @@ bool
 backup_cred(struct cifs_sb_info *cifs_sb)
 {
        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPUID) {
-                if (cifs_sb->mnt_backupuid == current_fsuid())
+                if (uid_eq(cifs_sb->mnt_backupuid, current_fsuid()))
                        return true;
        }
        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPGID) {
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index cdd6ff48246b..df40cc5fd13a 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -82,12 +82,10 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name,
        cFYI(1, "%s: for %s", __func__, name->name);
-        if (parent->d_op && parent->d_op->d_hash)
+        dentry = d_hash_and_lookup(parent, name);
-                parent->d_op->d_hash(parent, parent->d_inode, name);
+        if (unlikely(IS_ERR(dentry)))
-        else
+                return;
-                name->hash = full_name_hash(name->name, name->len);
-        dentry = d_lookup(parent, name);
        if (dentry) {
                int err;
@@ -505,7 +503,7 @@ static int cifs_entry_is_dot(struct cifs_dirent *de, bool is_unicode)
   whether we can use the cached search results from the previous search */
 static int is_dir_changed(struct file *file)
 {
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(file);
        struct cifsInodeInfo *cifsInfo = CIFS_I(inode);
        if (cifsInfo->time == 0)
@@ -778,7 +776,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
        switch ((int) file->f_pos) {
        case 0:
                if (filldir(direntry, ".", 1, file->f_pos,
-                     file->f_path.dentry->d_inode->i_ino, DT_DIR) < 0) {
+                     file_inode(file)->i_ino, DT_DIR) < 0) {
                        cERROR(1, "Filldir for current dir failed");
                        rc = -ENOMEM;
                        break;
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index a5d234c8d5d9..47bc5a87f94e 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -53,6 +53,13 @@ send_nt_cancel(struct TCP_Server_Info *server, void *buf,
                mutex_unlock(&server->srv_mutex);
                return rc;
        }
+        /*
+         * The response to this call was already factored into the sequence
+         * number when the call went out, so we must adjust it back downward
+         * after signing here.
+         */
+        --server->sequence_number;
        rc = smb_send(server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
        mutex_unlock(&server->srv_mutex);
@@ -952,4 +959,5 @@ struct smb_version_values smb1_values = {
        .cap_unix = CAP_UNIX,
        .cap_nt_find = CAP_NT_SMBS | CAP_NT_FIND,
        .cap_large_files = CAP_LARGE_FILES,
+        .oplock_read = OPLOCK_READ,
 };
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index d79de7bc4435..c9c7aa7ed966 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -708,6 +708,7 @@ struct smb_version_values smb20_values = {
        .cap_unix = 0,
        .cap_nt_find = SMB2_NT_FIND,
        .cap_large_files = SMB2_LARGE_FILES,
+        .oplock_read = SMB2_OPLOCK_LEVEL_II,
 };
 struct smb_version_values smb21_values = {
@@ -725,6 +726,7 @@ struct smb_version_values smb21_values = {
        .cap_unix = 0,
        .cap_nt_find = SMB2_NT_FIND,
        .cap_large_files = SMB2_LARGE_FILES,
+        .oplock_read = SMB2_OPLOCK_LEVEL_II,
 };
 struct smb_version_values smb30_values = {
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 76d974c952fe..1a528680ec5a 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -144,9 +144,6 @@ smb_send_kvec(struct TCP_Server_Info *server, struct kvec *iov, size_t n_vec,
        *sent = 0;
-        if (ssocket == NULL)
-                return -ENOTSOCK; /* BB eventually add reconnect code here */
        smb_msg.msg_name = (struct sockaddr *) &server->dstaddr;
        smb_msg.msg_namelen = sizeof(struct sockaddr);
        smb_msg.msg_control = NULL;
@@ -291,6 +288,9 @@ smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst)
        struct socket *ssocket = server->ssocket;
        int val = 1;
+        if (ssocket == NULL)
+                return -ENOTSOCK;
        cFYI(1, "Sending smb: smb_len=%u", smb_buf_length);
        dump_smb(iov[0].iov_base, iov[0].iov_len);
diff --git a/fs/coda/cache.c b/fs/coda/cache.c
index 958ae0e0ff8c..1da168c61d35 100644
--- a/fs/coda/cache.c
+++ b/fs/coda/cache.c
@@ -33,7 +33,7 @@ void coda_cache_enter(struct inode *inode, int mask)
        spin_lock(&cii->c_lock);
        cii->c_cached_epoch = atomic_read(&permission_epoch);
-        if (cii->c_uid != current_fsuid()) {
+        if (!uid_eq(cii->c_uid, current_fsuid())) {
                cii->c_uid = current_fsuid();
                cii->c_cached_perm = mask;
        } else
@@ -65,7 +65,7 @@ int coda_cache_check(struct inode *inode, int mask)
        
        spin_lock(&cii->c_lock);
        hit = (mask & cii->c_cached_perm) == mask &&
-            cii->c_uid == current_fsuid() &&
+            uid_eq(cii->c_uid, current_fsuid()) &&
            cii->c_cached_epoch == atomic_read(&permission_epoch);
        spin_unlock(&cii->c_lock);
diff --git a/fs/coda/coda_fs_i.h b/fs/coda/coda_fs_i.h
index b24fdfd8a3f0..c64075213218 100644
--- a/fs/coda/coda_fs_i.h
+++ b/fs/coda/coda_fs_i.h
@@ -25,7 +25,7 @@ struct coda_inode_info {
        u_short            c_flags;     /* flags (see below) */
        unsigned int       c_mapcount;  /* nr of times this inode is mapped */
        unsigned int       c_cached_epoch; /* epoch for cached permissions */
-        vuid_t             c_uid;       /* fsuid for cached permissions */
+        kuid_t             c_uid;       /* fsuid for cached permissions */
        unsigned int       c_cached_perm; /* cached access permissions */
        spinlock_t         c_lock;
        struct inode       vfs_inode;
diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c
index 854ace712685..2849f41e72a2 100644
--- a/fs/coda/coda_linux.c
+++ b/fs/coda/coda_linux.c
@@ -100,9 +100,9 @@ void coda_vattr_to_iattr(struct inode *inode, struct coda_vattr *attr)
        if (attr->va_mode != (u_short) -1)
                inode->i_mode = attr->va_mode | inode_type;
        if (attr->va_uid != -1) 
-                inode->i_uid = (uid_t) attr->va_uid;
+                inode->i_uid = make_kuid(&init_user_ns, (uid_t) attr->va_uid);
        if (attr->va_gid != -1)
-                inode->i_gid = (gid_t) attr->va_gid;
+                inode->i_gid = make_kgid(&init_user_ns, (gid_t) attr->va_gid);
        if (attr->va_nlink != -1)
                set_nlink(inode, attr->va_nlink);
        if (attr->va_size != -1)
@@ -171,10 +171,10 @@ void coda_iattr_to_vattr(struct iattr *iattr, struct coda_vattr *vattr)
                vattr->va_mode = iattr->ia_mode;
        }
        if ( valid & ATTR_UID ) {
-                vattr->va_uid = (vuid_t) iattr->ia_uid;
+                vattr->va_uid = (vuid_t) from_kuid(&init_user_ns, iattr->ia_uid);
        }
        if ( valid & ATTR_GID ) {
-                vattr->va_gid = (vgid_t) iattr->ia_gid;
+                vattr->va_gid = (vgid_t) from_kgid(&init_user_ns, iattr->ia_gid);
        }
        if ( valid & ATTR_SIZE ) {
                vattr->va_size = iattr->ia_size;
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 49fe52d25600..b7d3a05c062c 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -397,7 +397,7 @@ static int coda_readdir(struct file *coda_file, void *buf, filldir_t filldir)
                 * We can't use vfs_readdir because we have to keep the file
                 * position in sync between the coda_file and the host_file.
                 * and as such we need grab the inode mutex. */
-                struct inode *host_inode = host_file->f_path.dentry->d_inode;
+                struct inode *host_inode = file_inode(host_file);
                mutex_lock(&host_inode->i_mutex);
                host_file->f_pos = coda_file->f_pos;
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 8edd404e6419..fa4c100bdc7d 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -66,7 +66,7 @@ coda_file_splice_read(struct file *coda_file, loff_t *ppos,
 static ssize_t
 coda_file_write(struct file *coda_file, const char __user *buf, size_t count, loff_t *ppos)
 {
-        struct inode *host_inode, *coda_inode = coda_file->f_path.dentry->d_inode;
+        struct inode *host_inode, *coda_inode = file_inode(coda_file);
        struct coda_file_info *cfi;
        struct file *host_file;
        ssize_t ret;
@@ -78,7 +78,7 @@ coda_file_write(struct file *coda_file, const char __user *buf, size_t count, lo
        if (!host_file->f_op || !host_file->f_op->write)
                return -EINVAL;
-        host_inode = host_file->f_path.dentry->d_inode;
+        host_inode = file_inode(host_file);
        mutex_lock(&coda_inode->i_mutex);
        ret = host_file->f_op->write(host_file, buf, count, ppos);
@@ -106,8 +106,8 @@ coda_file_mmap(struct file *coda_file, struct vm_area_struct *vma)
        if (!host_file->f_op || !host_file->f_op->mmap)
                return -ENODEV;
-        coda_inode = coda_file->f_path.dentry->d_inode;
+        coda_inode = file_inode(coda_file);
-        host_inode = host_file->f_path.dentry->d_inode;
+        host_inode = file_inode(host_file);
        cii = ITOC(coda_inode);
        spin_lock(&cii->c_lock);
@@ -178,7 +178,7 @@ int coda_release(struct inode *coda_inode, struct file *coda_file)
        err = venus_close(coda_inode->i_sb, coda_i2f(coda_inode),
                          coda_flags, coda_file->f_cred->fsuid);
-        host_inode = cfi->cfi_container->f_path.dentry->d_inode;
+        host_inode = file_inode(cfi->cfi_container);
        cii = ITOC(coda_inode);
        /* did we mmap this file? */
@@ -202,7 +202,7 @@ int coda_release(struct inode *coda_inode, struct file *coda_file)
 int coda_fsync(struct file *coda_file, loff_t start, loff_t end, int datasync)
 {
        struct file *host_file;
-        struct inode *coda_inode = coda_file->f_path.dentry->d_inode;
+        struct inode *coda_inode = file_inode(coda_file);
        struct coda_file_info *cfi;
        int err;
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index be2aa4909487..dada9d0abede 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -20,6 +20,7 @@
 #include <linux/file.h>
 #include <linux/vfs.h>
 #include <linux/slab.h>
+#include <linux/pid_namespace.h>
 #include <asm/uaccess.h>
@@ -48,7 +49,7 @@ static struct inode *coda_alloc_inode(struct super_block *sb)
                return NULL;
        memset(&ei->c_fid, 0, sizeof(struct CodaFid));
        ei->c_flags = 0;
-        ei->c_uid = 0;
+        ei->c_uid = GLOBAL_ROOT_UID;
        ei->c_cached_perm = 0;
        spin_lock_init(&ei->c_lock);
        return &ei->vfs_inode;
@@ -129,7 +130,7 @@ static int get_device_index(struct coda_mount_data *data)
        f = fdget(data->fd);
        if (!f.file)
                goto Ebadf;
-        inode = f.file->f_path.dentry->d_inode;
+        inode = file_inode(f.file);
        if (!S_ISCHR(inode->i_mode) || imajor(inode) != CODA_PSDEV_MAJOR) {
                fdput(f);
                goto Ebadf;
@@ -157,6 +158,9 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
        int error;
        int idx;
+        if (task_active_pid_ns(current) != &init_pid_ns)
+                return -EINVAL;
        idx = get_device_index((struct coda_mount_data *) data);
        /* Ignore errors in data, for backward compatibility */
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c
index ee0981f1375b..3f5de96bbb58 100644
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -52,7 +52,7 @@ static long coda_pioctl(struct file *filp, unsigned int cmd,
        struct path path;
        int error;
        struct PioctlData data;
-        struct inode *inode = filp->f_dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct inode *target_inode = NULL;
        struct coda_inode_info *cnp;
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index 761d5b31b18d..ebc2bae6c289 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -37,6 +37,7 @@
 #include <linux/list.h>
 #include <linux/mutex.h>
 #include <linux/device.h>
+#include <linux/pid_namespace.h>
 #include <asm/io.h>
 #include <asm/poll.h>
 #include <asm/uaccess.h>
@@ -266,6 +267,12 @@ static int coda_psdev_open(struct inode * inode, struct file * file)
        struct venus_comm *vcp;
        int idx, err;
+        if (task_active_pid_ns(current) != &init_pid_ns)
+                return -EINVAL;
+        if (current_user_ns() != &init_user_ns)
+                return -EINVAL;
        idx = iminor(inode);
        if (idx < 0 || idx >= MAX_CODADEVS)
                return -ENODEV;
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index 0c68fd31fbf2..3a731976dc5e 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -50,9 +50,9 @@ static void *alloc_upcall(int opcode, int size)
                return ERR_PTR(-ENOMEM);
        inp->ih.opcode = opcode;
-        inp->ih.pid = current->pid;
+        inp->ih.pid = task_pid_nr_ns(current, &init_pid_ns);
-        inp->ih.pgid = task_pgrp_nr(current);
+        inp->ih.pgid = task_pgrp_nr_ns(current, &init_pid_ns);
-        inp->ih.uid = current_fsuid();
+        inp->ih.uid = from_kuid(&init_user_ns, current_fsuid());
        return (void*)inp;
 }
@@ -157,7 +157,7 @@ int venus_lookup(struct super_block *sb, struct CodaFid *fid,
 }
 int venus_close(struct super_block *sb, struct CodaFid *fid, int flags,
-                vuid_t uid)
+                kuid_t uid)
 {
        union inputArgs *inp;
        union outputArgs *outp;
@@ -166,7 +166,7 @@ int venus_close(struct super_block *sb, struct CodaFid *fid, int flags,
        insize = SIZE(release);
        UPARG(CODA_CLOSE);
        
-        inp->ih.uid = uid;
+        inp->ih.uid = from_kuid(&init_user_ns, uid);
        inp->coda_close.VFid = *fid;
        inp->coda_close.flags = flags;
diff --git a/fs/compat.c b/fs/compat.c
index 015e1e1f87c6..fe40fde29111 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1278,8 +1278,7 @@ compat_sys_vmsplice(int fd, const struct compat_iovec __user *iov32,
 * Exactly like fs/open.c:sys_open(), except that it doesn't set the
 * O_LARGEFILE flag.
 */
-asmlinkage long
+COMPAT_SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode)
-compat_sys_open(const char __user *filename, int flags, umode_t mode)
 {
        return do_sys_open(AT_FDCWD, filename, flags, mode);
 }
@@ -1288,8 +1287,7 @@ compat_sys_open(const char __user *filename, int flags, umode_t mode)
 * Exactly like fs/open.c:sys_openat(), except that it doesn't set the
 * O_LARGEFILE flag.
 */
-asmlinkage long
+COMPAT_SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags, umode_t, mode)
-compat_sys_openat(unsigned int dfd, const char __user *filename, int flags, umode_t mode)
 {
        return do_sys_open(dfd, filename, flags, mode);
 }
@@ -1739,55 +1737,13 @@ asmlinkage long compat_sys_signalfd(int ufd,
 }
 #endif /* CONFIG_SIGNALFD */
-#ifdef CONFIG_TIMERFD
-asmlinkage long compat_sys_timerfd_settime(int ufd, int flags,
-                                   const struct compat_itimerspec __user *utmr,
-                                   struct compat_itimerspec __user *otmr)
-{
-        int error;
-        struct itimerspec t;
-        struct itimerspec __user *ut;
-        if (get_compat_itimerspec(&t, utmr))
-                return -EFAULT;
-        ut = compat_alloc_user_space(2 * sizeof(struct itimerspec));
-        if (copy_to_user(&ut[0], &t, sizeof(t)))
-                return -EFAULT;
-        error = sys_timerfd_settime(ufd, flags, &ut[0], &ut[1]);
-        if (!error && otmr)
-                error = (copy_from_user(&t, &ut[1], sizeof(struct itimerspec)) ||
-                         put_compat_itimerspec(otmr, &t)) ? -EFAULT: 0;
-        return error;
-}
-asmlinkage long compat_sys_timerfd_gettime(int ufd,
-                                   struct compat_itimerspec __user *otmr)
-{
-        int error;
-        struct itimerspec t;
-        struct itimerspec __user *ut;
-        ut = compat_alloc_user_space(sizeof(struct itimerspec));
-        error = sys_timerfd_gettime(ufd, ut);
-        if (!error)
-                error = (copy_from_user(&t, ut, sizeof(struct itimerspec)) ||
-                         put_compat_itimerspec(otmr, &t)) ? -EFAULT: 0;
-        return error;
-}
-#endif /* CONFIG_TIMERFD */
 #ifdef CONFIG_FHANDLE
 /*
 * Exactly like fs/open.c:sys_open_by_handle_at(), except that it
 * doesn't set the O_LARGEFILE flag.
 */
-asmlinkage long
+COMPAT_SYSCALL_DEFINE3(open_by_handle_at, int, mountdirfd,
-compat_sys_open_by_handle_at(int mountdirfd,
+                             struct file_handle __user *, handle, int, flags)
-                             struct file_handle __user *handle, int flags)
 {
        return do_handle_open(mountdirfd, handle, flags);
 }
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index e2f57a007029..3ced75f765ca 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1582,7 +1582,7 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
        case FIBMAP:
        case FIGETBSZ:
        case FIONREAD:
-                if (S_ISREG(f.file->f_path.dentry->d_inode->i_mode))
+                if (S_ISREG(file_inode(f.file)->i_mode))
                        break;
                /*FALL THROUGH*/
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 712b10f64c70..7aabc6ad4e9b 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1037,10 +1037,11 @@ static int configfs_dump(struct configfs_dirent *sd, int level)
 static int configfs_depend_prep(struct dentry *origin,
                                struct config_item *target)
 {
-        struct configfs_dirent *child_sd, *sd = origin->d_fsdata;
+        struct configfs_dirent *child_sd, *sd;
        int ret = 0;
-        BUG_ON(!origin || !sd);
+        BUG_ON(!origin || !origin->d_fsdata);
+        sd = origin->d_fsdata;
        if (sd->s_element == target)  /* Boo-yah */
                goto out;
@@ -1625,7 +1626,7 @@ static loff_t configfs_dir_lseek(struct file *file, loff_t offset, int whence)
                        if (offset >= 0)
                                break;
                default:
-                        mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
+                        mutex_unlock(&file_inode(file)->i_mutex);
                        return -EINVAL;
        }
        if (offset != file->f_pos) {
diff --git a/fs/coredump.c b/fs/coredump.c
index 177493272a61..c6479658d487 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -411,7 +411,7 @@ static void wait_for_dump_helpers(struct file *file)
 {
        struct pipe_inode_info *pipe;
-        pipe = file->f_path.dentry->d_inode->i_pipe;
+        pipe = file_inode(file)->i_pipe;
        pipe_lock(pipe);
        pipe->readers++;
@@ -501,7 +501,7 @@ void do_coredump(siginfo_t *siginfo)
         * so we dump it as root in mode 2, and only into a controlled
         * environment (pipe handler or fully qualified path).
         */
-        if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) {
+        if (__get_dumpable(cprm.mm_flags) == SUID_DUMP_ROOT) {
                /* Setuid core dump mode */
                flag = O_EXCL;          /* Stop rewrite attacks */
                cred->fsuid = GLOBAL_ROOT_UID;  /* Dump root private */
@@ -600,7 +600,7 @@ void do_coredump(siginfo_t *siginfo)
                if (IS_ERR(cprm.file))
                        goto fail_unlock;
-                inode = cprm.file->f_path.dentry->d_inode;
+                inode = file_inode(cprm.file);
                if (inode->i_nlink > 1)
                        goto close_fail;
                if (d_unhashed(cprm.file->f_path.dentry))
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index c6c3f91ecf06..3ceb9ec976e1 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -351,7 +351,7 @@ static int cramfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 */
 static int cramfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct super_block *sb = inode->i_sb;
        char *buf;
        unsigned int offset;
diff --git a/fs/dcache.c b/fs/dcache.c
index 19153a0a810c..fbfae008ba44 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -675,11 +675,10 @@ EXPORT_SYMBOL(dget_parent);
 static struct dentry *__d_find_alias(struct inode *inode, int want_discon)
 {
        struct dentry *alias, *discon_alias;
-        struct hlist_node *p;
 again:
        discon_alias = NULL;
-        hlist_for_each_entry(alias, p, &inode->i_dentry, d_alias) {
+        hlist_for_each_entry(alias, &inode->i_dentry, d_alias) {
                spin_lock(&alias->d_lock);
                if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
                        if (IS_ROOT(alias) &&
@@ -730,10 +729,9 @@ EXPORT_SYMBOL(d_find_alias);
 void d_prune_aliases(struct inode *inode)
 {
        struct dentry *dentry;
-        struct hlist_node *p;
 restart:
        spin_lock(&inode->i_lock);
-        hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) {
+        hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
                spin_lock(&dentry->d_lock);
                if (!dentry->d_count) {
                        __dget_dlock(dentry);
@@ -1358,6 +1356,7 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
        WARN_ON_ONCE(dentry->d_flags & (DCACHE_OP_HASH  |
                                DCACHE_OP_COMPARE       |
                                DCACHE_OP_REVALIDATE    |
+                                DCACHE_OP_WEAK_REVALIDATE       |
                                DCACHE_OP_DELETE ));
        dentry->d_op = op;
        if (!op)
@@ -1368,6 +1367,8 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
                dentry->d_flags |= DCACHE_OP_COMPARE;
        if (op->d_revalidate)
                dentry->d_flags |= DCACHE_OP_REVALIDATE;
+        if (op->d_weak_revalidate)
+                dentry->d_flags |= DCACHE_OP_WEAK_REVALIDATE;
        if (op->d_delete)
                dentry->d_flags |= DCACHE_OP_DELETE;
        if (op->d_prune)
@@ -1440,14 +1441,13 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry,
        int len = entry->d_name.len;
        const char *name = entry->d_name.name;
        unsigned int hash = entry->d_name.hash;
-        struct hlist_node *p;
        if (!inode) {
                __d_instantiate(entry, NULL);
                return NULL;
        }
-        hlist_for_each_entry(alias, p, &inode->i_dentry, d_alias) {
+        hlist_for_each_entry(alias, &inode->i_dentry, d_alias) {
                /*
                 * Don't need alias->d_lock here, because aliases with
                 * d_parent == entry->d_parent are not subject to name or
@@ -1672,7 +1672,6 @@ EXPORT_SYMBOL(d_splice_alias);
 struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
                        struct qstr *name)
 {
-        int error;
        struct dentry *found;
        struct dentry *new;
@@ -1681,10 +1680,12 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
         * if not go ahead and create it now.
         */
        found = d_hash_and_lookup(dentry->d_parent, name);
+        if (unlikely(IS_ERR(found)))
+                goto err_out;
        if (!found) {
                new = d_alloc(dentry->d_parent, name);
                if (!new) {
-                        error = -ENOMEM;
+                        found = ERR_PTR(-ENOMEM);
                        goto err_out;
                }
@@ -1725,7 +1726,7 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
 err_out:
        iput(inode);
-        return ERR_PTR(error);
+        return found;
 }
 EXPORT_SYMBOL(d_add_ci);
@@ -1889,7 +1890,7 @@ seqretry:
 * dentry is returned. The caller must use dput to free the entry when it has
 * finished using it. %NULL is returned if the dentry does not exist.
 */
-struct dentry *d_lookup(struct dentry *parent, struct qstr *name)
+struct dentry *d_lookup(const struct dentry *parent, const struct qstr *name)
 {
        struct dentry *dentry;
        unsigned seq;
@@ -1919,7 +1920,7 @@ EXPORT_SYMBOL(d_lookup);
 *
 * __d_lookup callers must be commented.
 */
-struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
+struct dentry *__d_lookup(const struct dentry *parent, const struct qstr *name)
 {
        unsigned int len = name->len;
        unsigned int hash = name->hash;
@@ -1997,12 +1998,10 @@ next:
 * @dir: Directory to search in
 * @name: qstr of name we wish to find
 *
- * On hash failure or on lookup failure NULL is returned.
+ * On lookup failure NULL is returned; on bad name - ERR_PTR(-error)
 */
 struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
 {
-        struct dentry *dentry = NULL;
        /*
         * Check for a fs-specific hash function. Note that we must
         * calculate the standard hash first, as the d_op->d_hash()
@@ -2010,13 +2009,13 @@ struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
         */
        name->hash = full_name_hash(name->name, name->len);
        if (dir->d_flags & DCACHE_OP_HASH) {
-                if (dir->d_op->d_hash(dir, dir->d_inode, name) < 0)
+                int err = dir->d_op->d_hash(dir, dir->d_inode, name);
-                        goto out;
+                if (unlikely(err < 0))
+                        return ERR_PTR(err);
        }
-        dentry = d_lookup(dir, name);
+        return d_lookup(dir, name);
-out:
-        return dentry;
 }
+EXPORT_SYMBOL(d_hash_and_lookup);
 /**
 * d_validate - verify dentry provided from insecure source (deprecated)
@@ -2394,7 +2393,7 @@ out_err:
 */
 static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
 {
-        struct dentry *dparent, *aparent;
+        struct dentry *dparent;
        dentry_lock_for_move(anon, dentry);
@@ -2402,24 +2401,15 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
        write_seqcount_begin(&anon->d_seq);
        dparent = dentry->d_parent;
-        aparent = anon->d_parent;
        switch_names(dentry, anon);
        swap(dentry->d_name.hash, anon->d_name.hash);
-        dentry->d_parent = (aparent == anon) ? dentry : aparent;
+        dentry->d_parent = dentry;
-        list_del(&dentry->d_u.d_child);
+        list_del_init(&dentry->d_u.d_child);
-        if (!IS_ROOT(dentry))
+        anon->d_parent = dparent;
-                list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
-        else
-                INIT_LIST_HEAD(&dentry->d_u.d_child);
-        anon->d_parent = (dparent == dentry) ? anon : dparent;
        list_del(&anon->d_u.d_child);
-        if (!IS_ROOT(anon))
+        list_add(&anon->d_u.d_child, &dparent->d_subdirs);
-                list_add(&anon->d_u.d_child, &anon->d_parent->d_subdirs);
-        else
-                INIT_LIST_HEAD(&anon->d_u.d_child);
        write_seqcount_end(&dentry->d_seq);
        write_seqcount_end(&anon->d_seq);
@@ -2722,37 +2712,6 @@ char *d_path(const struct path *path, char *buf, int buflen)
 }
 EXPORT_SYMBOL(d_path);
-/**
- * d_path_with_unreachable - return the path of a dentry
- * @path: path to report
- * @buf: buffer to return value in
- * @buflen: buffer length
- *
- * The difference from d_path() is that this prepends "(unreachable)"
- * to paths which are unreachable from the current process' root.
- */
-char *d_path_with_unreachable(const struct path *path, char *buf, int buflen)
-{
-        char *res = buf + buflen;
-        struct path root;
-        int error;
-        if (path->dentry->d_op && path->dentry->d_op->d_dname)
-                return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
-        get_fs_root(current->fs, &root);
-        write_seqlock(&rename_lock);
-        error = path_with_deleted(path, &root, &res, &buflen);
-        if (error > 0)
-                error = prepend_unreachable(&res, &buflen);
-        write_sequnlock(&rename_lock);
-        path_put(&root);
-        if (error)
-                res =  ERR_PTR(error);
-        return res;
-}
 /*
 * Helper function for dentry_operations.d_dname() members
 */
@@ -3035,7 +2994,7 @@ ino_t find_inode_number(struct dentry *dir, struct qstr *name)
        ino_t ino = 0;
        dentry = d_hash_and_lookup(dir, name);
-        if (dentry) {
+        if (!IS_ERR_OR_NULL(dentry)) {
                if (dentry->d_inode)
                        ino = dentry->d_inode->i_ino;
                dput(dentry);
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 153bb1e42e63..0c4f80b447fb 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -176,7 +176,7 @@ static int debugfs_parse_options(char *data, struct debugfs_mount_opts *opts)
                        opts->uid = uid;
                        break;
                case Opt_gid:
-                        if (match_octal(&args[0], &option))
+                        if (match_int(&args[0], &option))
                                return -EINVAL;
                        gid = make_kgid(current_user_ns(), option);
                        if (!gid_valid(gid))
@@ -322,7 +322,6 @@ static struct dentry *__create_file(const char *name, umode_t mode,
        if (!parent)
                parent = debugfs_mount->mnt_root;
-        dentry = NULL;
        mutex_lock(&parent->d_inode->i_mutex);
        dentry = lookup_one_len(name, parent, strlen(name));
        if (!IS_ERR(dentry)) {
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 472e6befc54d..073d30b9d1ac 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -243,6 +243,13 @@ static int mknod_ptmx(struct super_block *sb)
        struct dentry *root = sb->s_root;
        struct pts_fs_info *fsi = DEVPTS_SB(sb);
        struct pts_mount_opts *opts = &fsi->mount_opts;
+        kuid_t root_uid;
+        kgid_t root_gid;
+        root_uid = make_kuid(current_user_ns(), 0);
+        root_gid = make_kgid(current_user_ns(), 0);
+        if (!uid_valid(root_uid) || !gid_valid(root_gid))
+                return -EINVAL;
        mutex_lock(&root->d_inode->i_mutex);
@@ -273,6 +280,8 @@ static int mknod_ptmx(struct super_block *sb)
        mode = S_IFCHR|opts->ptmxmode;
        init_special_inode(inode, mode, MKDEV(TTYAUX_MAJOR, 2));
+        inode->i_uid = root_uid;
+        inode->i_gid = root_gid;
        d_add(dentry, inode);
@@ -438,6 +447,12 @@ static struct dentry *devpts_mount(struct file_system_type *fs_type,
        if (error)
                return ERR_PTR(error);
+        /* Require newinstance for all user namespace mounts to ensure
+         * the mount options are not changed.
+         */
+        if ((current_user_ns() != &init_user_ns) && !opts.newinstance)
+                return ERR_PTR(-EINVAL);
        if (opts.newinstance)
                s = sget(fs_type, NULL, set_anon_super, flags, NULL);
        else
@@ -491,6 +506,9 @@ static struct file_system_type devpts_fs_type = {
        .name           = "devpts",
        .mount          = devpts_mount,
        .kill_sb        = devpts_kill_sb,
+#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
+        .fs_flags       = FS_USERNS_MOUNT | FS_USERNS_DEV_MOUNT,
+#endif
 };
 /*
diff --git a/fs/direct-io.c b/fs/direct-io.c
index cf5b44b10c67..f853263cf74f 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -261,9 +261,9 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, bool is
                dio->end_io(dio->iocb, offset, transferred,
                            dio->private, ret, is_async);
        } else {
+                inode_dio_done(dio->inode);
                if (is_async)
                        aio_complete(dio->iocb, ret, 0);
-                inode_dio_done(dio->inode);
        }
        return ret;
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index a0387dd8b1f0..7d58d5b112b5 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -158,7 +158,7 @@ static ssize_t cluster_set(struct dlm_cluster *cl, unsigned int *cl_field,
        unsigned int x;
        if (!capable(CAP_SYS_ADMIN))
-                return -EACCES;
+                return -EPERM;
        x = simple_strtoul(buf, NULL, 0);
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 77c0f70f8fe8..e7665c31f7b1 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -96,10 +96,13 @@ do { \
 }
+#define DLM_RTF_SHRINK          0x00000001
 struct dlm_rsbtable {
        struct rb_root          keep;
        struct rb_root          toss;
        spinlock_t              lock;
+        uint32_t                flags;
 };
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index a579f30f237d..1b1146670c4b 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -1132,6 +1132,7 @@ static void toss_rsb(struct kref *kref)
        rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[r->res_bucket].keep);
        rsb_insert(r, &ls->ls_rsbtbl[r->res_bucket].toss);
        r->res_toss_time = jiffies;
+        ls->ls_rsbtbl[r->res_bucket].flags |= DLM_RTF_SHRINK;
        if (r->res_lvbptr) {
                dlm_free_lvb(r->res_lvbptr);
                r->res_lvbptr = NULL;
@@ -1182,7 +1183,7 @@ static void detach_lkb(struct dlm_lkb *lkb)
 static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
 {
        struct dlm_lkb *lkb;
-        int rv, id;
+        int rv;
        lkb = dlm_allocate_lkb(ls);
        if (!lkb)
@@ -1198,19 +1199,13 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
        mutex_init(&lkb->lkb_cb_mutex);
        INIT_WORK(&lkb->lkb_cb_work, dlm_callback_work);
- retry:
+        idr_preload(GFP_NOFS);
-        rv = idr_pre_get(&ls->ls_lkbidr, GFP_NOFS);
-        if (!rv)
-                return -ENOMEM;
        spin_lock(&ls->ls_lkbidr_spin);
-        rv = idr_get_new_above(&ls->ls_lkbidr, lkb, 1, &id);
+        rv = idr_alloc(&ls->ls_lkbidr, lkb, 1, 0, GFP_NOWAIT);
-        if (!rv)
+        if (rv >= 0)
-                lkb->lkb_id = id;
+                lkb->lkb_id = rv;
        spin_unlock(&ls->ls_lkbidr_spin);
+        idr_preload_end();
-        if (rv == -EAGAIN)
-                goto retry;
        if (rv < 0) {
                log_error(ls, "create_lkb idr error %d", rv);
@@ -1659,11 +1654,18 @@ static void shrink_bucket(struct dlm_ls *ls, int b)
        char *name;
        int our_nodeid = dlm_our_nodeid();
        int remote_count = 0;
+        int need_shrink = 0;
        int i, len, rv;
        memset(&ls->ls_remove_lens, 0, sizeof(int) * DLM_REMOVE_NAMES_MAX);
        spin_lock(&ls->ls_rsbtbl[b].lock);
+        if (!(ls->ls_rsbtbl[b].flags & DLM_RTF_SHRINK)) {
+                spin_unlock(&ls->ls_rsbtbl[b].lock);
+                return;
+        }
        for (n = rb_first(&ls->ls_rsbtbl[b].toss); n; n = next) {
                next = rb_next(n);
                r = rb_entry(n, struct dlm_rsb, res_hashnode);
@@ -1679,6 +1681,8 @@ static void shrink_bucket(struct dlm_ls *ls, int b)
                        continue;
                }
+                need_shrink = 1;
                if (!time_after_eq(jiffies, r->res_toss_time +
                                   dlm_config.ci_toss_secs * HZ)) {
                        continue;
@@ -1710,6 +1714,11 @@ static void shrink_bucket(struct dlm_ls *ls, int b)
                rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[b].toss);
                dlm_free_rsb(r);
        }
+        if (need_shrink)
+                ls->ls_rsbtbl[b].flags |= DLM_RTF_SHRINK;
+        else
+                ls->ls_rsbtbl[b].flags &= ~DLM_RTF_SHRINK;
        spin_unlock(&ls->ls_rsbtbl[b].lock);
        /*
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 2e99fb0c9737..3ca79d3253b9 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -796,7 +796,6 @@ static int release_lockspace(struct dlm_ls *ls, int force)
         */
        idr_for_each(&ls->ls_lkbidr, lkb_idr_free, ls);
-        idr_remove_all(&ls->ls_lkbidr);
        idr_destroy(&ls->ls_lkbidr);
        /*
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index dd87a31bcc21..4f5ad246582f 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -177,12 +177,11 @@ static inline int nodeid_hash(int nodeid)
 static struct connection *__find_con(int nodeid)
 {
        int r;
-        struct hlist_node *h;
        struct connection *con;
        r = nodeid_hash(nodeid);
-        hlist_for_each_entry(con, h, &connection_hash[r], list) {
+        hlist_for_each_entry(con, &connection_hash[r], list) {
                if (con->nodeid == nodeid)
                        return con;
        }
@@ -232,13 +231,12 @@ static struct connection *__nodeid2con(int nodeid, gfp_t alloc)
 static void foreach_conn(void (*conn_func)(struct connection *c))
 {
        int i;
-        struct hlist_node *h, *n;
+        struct hlist_node *n;
        struct connection *con;
        for (i = 0; i < CONN_HASH_SIZE; i++) {
-                hlist_for_each_entry_safe(con, h, n, &connection_hash[i], list){
+                hlist_for_each_entry_safe(con, n, &connection_hash[i], list)
                        conn_func(con);
-                }
        }
 }
@@ -257,13 +255,12 @@ static struct connection *nodeid2con(int nodeid, gfp_t allocation)
 static struct connection *assoc2con(int assoc_id)
 {
        int i;
-        struct hlist_node *h;
        struct connection *con;
        mutex_lock(&connections_lock);
        for (i = 0 ; i < CONN_HASH_SIZE; i++) {
-                hlist_for_each_entry(con, h, &connection_hash[i], list) {
+                hlist_for_each_entry(con, &connection_hash[i], list) {
                        if (con->sctp_assoc == assoc_id) {
                                mutex_unlock(&connections_lock);
                                return con;
diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c
index aedea28a86a1..a6bc63f6e31b 100644
--- a/fs/dlm/recover.c
+++ b/fs/dlm/recover.c
@@ -305,27 +305,26 @@ static int recover_idr_empty(struct dlm_ls *ls)
 static int recover_idr_add(struct dlm_rsb *r)
 {
        struct dlm_ls *ls = r->res_ls;
-        int rv, id;
+        int rv;
-        rv = idr_pre_get(&ls->ls_recover_idr, GFP_NOFS);
-        if (!rv)
-                return -ENOMEM;
+        idr_preload(GFP_NOFS);
        spin_lock(&ls->ls_recover_idr_lock);
        if (r->res_id) {
-                spin_unlock(&ls->ls_recover_idr_lock);
+                rv = -1;
-                return -1;
+                goto out_unlock;
-        }
-        rv = idr_get_new_above(&ls->ls_recover_idr, r, 1, &id);
-        if (rv) {
-                spin_unlock(&ls->ls_recover_idr_lock);
-                return rv;
        }
-        r->res_id = id;
+        rv = idr_alloc(&ls->ls_recover_idr, r, 1, 0, GFP_NOWAIT);
+        if (rv < 0)
+                goto out_unlock;
+        r->res_id = rv;
        ls->ls_recover_list_count++;
        dlm_hold_rsb(r);
+        rv = 0;
+out_unlock:
        spin_unlock(&ls->ls_recover_idr_lock);
-        return 0;
+        idr_preload_end();
+        return rv;
 }
 static void recover_idr_del(struct dlm_rsb *r)
@@ -351,24 +350,21 @@ static struct dlm_rsb *recover_idr_find(struct dlm_ls *ls, uint64_t id)
        return r;
 }
-static int recover_idr_clear_rsb(int id, void *p, void *data)
+static void recover_idr_clear(struct dlm_ls *ls)
 {
-        struct dlm_ls *ls = data;
+        struct dlm_rsb *r;
-        struct dlm_rsb *r = p;
+        int id;
-        r->res_id = 0;
+        spin_lock(&ls->ls_recover_idr_lock);
-        r->res_recover_locks_count = 0;
-        ls->ls_recover_list_count--;
-        dlm_put_rsb(r);
+        idr_for_each_entry(&ls->ls_recover_idr, r, id) {
-        return 0;
+                idr_remove(&ls->ls_recover_idr, id);
-}
+                r->res_id = 0;
+                r->res_recover_locks_count = 0;
+                ls->ls_recover_list_count--;
-static void recover_idr_clear(struct dlm_ls *ls)
+                dlm_put_rsb(r);
-{
+        }
-        spin_lock(&ls->ls_recover_idr_lock);
-        idr_for_each(&ls->ls_recover_idr, recover_idr_clear_rsb, ls);
-        idr_remove_all(&ls->ls_recover_idr);
        if (ls->ls_recover_list_count != 0) {
                log_error(ls, "warning: recover_list_count %d",
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index 7ff49852b0cb..911649a47dd5 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -503,11 +503,11 @@ static ssize_t device_write(struct file *file, const char __user *buf,
 #endif
                return -EINVAL;
-#ifdef CONFIG_COMPAT
+        /*
-        if (count > sizeof(struct dlm_write_request32) + DLM_RESNAME_MAXLEN)
+         * can't compare against COMPAT/dlm_write_request32 because
-#else
+         * we don't yet know if is64bit is zero
+         */
        if (count > sizeof(struct dlm_write_request) + DLM_RESNAME_MAXLEN)
-#endif
                return -EINVAL;
        kbuf = kzalloc(count + 1, GFP_NOFS);
diff --git a/fs/ecryptfs/Kconfig b/fs/ecryptfs/Kconfig
index cc16562654de..e15ef38c24fa 100644
--- a/fs/ecryptfs/Kconfig
+++ b/fs/ecryptfs/Kconfig
@@ -1,6 +1,6 @@
 config ECRYPT_FS
-        tristate "eCrypt filesystem layer support (EXPERIMENTAL)"
+        tristate "eCrypt filesystem layer support"
-        depends on EXPERIMENTAL && KEYS && CRYPTO && (ENCRYPTED_KEYS || ENCRYPTED_KEYS=n)
+        depends on KEYS && CRYPTO && (ENCRYPTED_KEYS || ENCRYPTED_KEYS=n)
        select CRYPTO_ECB
        select CRYPTO_CBC
        select CRYPTO_MD5
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index cfb4b9fed520..7e2c6f5d7985 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -509,6 +509,12 @@ ecryptfs_dentry_to_lower_mnt(struct dentry *dentry)
        return ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path.mnt;
 }
+static inline struct path *
+ecryptfs_dentry_to_lower_path(struct dentry *dentry)
+{
+        return &((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path;
+}
 static inline void
 ecryptfs_set_dentry_lower_mnt(struct dentry *dentry, struct vfsmount *lower_mnt)
 {
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index d45ba4568128..53acc9d0c138 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -118,7 +118,7 @@ static int ecryptfs_readdir(struct file *file, void *dirent, filldir_t filldir)
        lower_file = ecryptfs_file_to_lower(file);
        lower_file->f_pos = file->f_pos;
-        inode = file->f_path.dentry->d_inode;
+        inode = file_inode(file);
        memset(&buf, 0, sizeof(buf));
        buf.dirent = dirent;
        buf.dentry = file->f_path.dentry;
@@ -133,7 +133,7 @@ static int ecryptfs_readdir(struct file *file, void *dirent, filldir_t filldir)
                goto out;
        if (rc >= 0)
                fsstack_copy_attr_atime(inode,
-                                        lower_file->f_path.dentry->d_inode);
+                                        file_inode(lower_file));
 out:
        return rc;
 }
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index cc7709e7c508..e0f07fb6d56b 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -1027,8 +1027,7 @@ int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
        struct kstat lower_stat;
        int rc;
-        rc = vfs_getattr(ecryptfs_dentry_to_lower_mnt(dentry),
+        rc = vfs_getattr(ecryptfs_dentry_to_lower_path(dentry), &lower_stat);
-                         ecryptfs_dentry_to_lower(dentry), &lower_stat);
        if (!rc) {
                fsstack_copy_attr_all(dentry->d_inode,
                                      ecryptfs_inode_to_lower(dentry->d_inode));
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index 5fa2471796c2..8d7a577ae497 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -115,10 +115,9 @@ void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx)
 */
 int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon)
 {
-        struct hlist_node *elem;
        int rc;
-        hlist_for_each_entry(*daemon, elem,
+        hlist_for_each_entry(*daemon,
                            &ecryptfs_daemon_hash[ecryptfs_current_euid_hash()],
                            euid_chain) {
                if (uid_eq((*daemon)->file->f_cred->euid, current_euid())) {
@@ -445,7 +444,6 @@ void ecryptfs_release_messaging(void)
                mutex_unlock(&ecryptfs_msg_ctx_lists_mux);
        }
        if (ecryptfs_daemon_hash) {
-                struct hlist_node *elem;
                struct ecryptfs_daemon *daemon;
                int i;
@@ -453,7 +451,7 @@ void ecryptfs_release_messaging(void)
                for (i = 0; i < (1 << ecryptfs_hash_bits); i++) {
                        int rc;
-                        hlist_for_each_entry(daemon, elem,
+                        hlist_for_each_entry(daemon,
                                             &ecryptfs_daemon_hash[i],
                                             euid_chain) {
                                rc = ecryptfs_exorcise_daemon(daemon);
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c
index b2a34a192f4f..6a160539cd23 100644
--- a/fs/ecryptfs/read_write.c
+++ b/fs/ecryptfs/read_write.c
@@ -40,16 +40,12 @@ int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data,
                         loff_t offset, size_t size)
 {
        struct file *lower_file;
-        mm_segment_t fs_save;
        ssize_t rc;
        lower_file = ecryptfs_inode_to_private(ecryptfs_inode)->lower_file;
        if (!lower_file)
                return -EIO;
-        fs_save = get_fs();
+        rc = kernel_write(lower_file, data, size, offset);
-        set_fs(get_ds());
-        rc = vfs_write(lower_file, data, size, &offset);
-        set_fs(fs_save);
        mark_inode_dirty_sync(ecryptfs_inode);
        return rc;
 }
diff --git a/fs/efs/Kconfig b/fs/efs/Kconfig
index 6ebfc1c207a8..d020e3c30fea 100644
--- a/fs/efs/Kconfig
+++ b/fs/efs/Kconfig
@@ -1,6 +1,6 @@
 config EFS_FS
-        tristate "EFS file system support (read only) (EXPERIMENTAL)"
+        tristate "EFS file system support (read only)"
-        depends on BLOCK && EXPERIMENTAL
+        depends on BLOCK
        help
          EFS is an older file system used for non-ISO9660 CD-ROMs and hard
          disk partitions by SGI's IRIX operating system (IRIX 6.0 and newer
diff --git a/fs/efs/dir.c b/fs/efs/dir.c
index 7ee6f7e3a608..055a9e9ca747 100644
--- a/fs/efs/dir.c
+++ b/fs/efs/dir.c
@@ -20,7 +20,7 @@ const struct inode_operations efs_dir_inode_operations = {
 };
 static int efs_readdir(struct file *filp, void *dirent, filldir_t filldir) {
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct buffer_head *bh;
        struct efs_dir          *dirblock;
diff --git a/fs/exec.c b/fs/exec.c
index 18c45cac368f..a96a4885bbbf 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -123,7 +123,7 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
                goto out;
        error = -EINVAL;
-        if (!S_ISREG(file->f_path.dentry->d_inode->i_mode))
+        if (!S_ISREG(file_inode(file)->i_mode))
                goto exit;
        error = -EACCES;
@@ -355,7 +355,7 @@ static bool valid_arg_len(struct linux_binprm *bprm, long len)
 * flags, permissions, and offset, so we use temporary values.  We'll update
 * them later in setup_arg_pages().
 */
-int bprm_mm_init(struct linux_binprm *bprm)
+static int bprm_mm_init(struct linux_binprm *bprm)
 {
        int err;
        struct mm_struct *mm = NULL;
@@ -434,8 +434,9 @@ static int count(struct user_arg_ptr argv, int max)
                        if (IS_ERR(p))
                                return -EFAULT;
-                        if (i++ >= max)
+                        if (i >= max)
                                return -E2BIG;
+                        ++i;
                        if (fatal_signal_pending(current))
                                return -ERESTARTNOHAND;
@@ -763,7 +764,7 @@ struct file *open_exec(const char *name)
                goto out;
        err = -EACCES;
-        if (!S_ISREG(file->f_path.dentry->d_inode->i_mode))
+        if (!S_ISREG(file_inode(file)->i_mode))
                goto exit;
        if (file->f_path.mnt->mnt_flags & MNT_NOEXEC)
@@ -1097,7 +1098,7 @@ EXPORT_SYMBOL(flush_old_exec);
 void would_dump(struct linux_binprm *bprm, struct file *file)
 {
-        if (inode_permission(file->f_path.dentry->d_inode, MAY_READ) < 0)
+        if (inode_permission(file_inode(file), MAY_READ) < 0)
                bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
 }
 EXPORT_SYMBOL(would_dump);
@@ -1110,7 +1111,7 @@ void setup_new_exec(struct linux_binprm * bprm)
        current->sas_ss_sp = current->sas_ss_size = 0;
        if (uid_eq(current_euid(), current_uid()) && gid_eq(current_egid(), current_gid()))
-                set_dumpable(current->mm, SUID_DUMPABLE_ENABLED);
+                set_dumpable(current->mm, SUID_DUMP_USER);
        else
                set_dumpable(current->mm, suid_dumpable);
@@ -1269,7 +1270,7 @@ static int check_unsafe_exec(struct linux_binprm *bprm)
 int prepare_binprm(struct linux_binprm *bprm)
 {
        umode_t mode;
-        struct inode * inode = bprm->file->f_path.dentry->d_inode;
+        struct inode * inode = file_inode(bprm->file);
        int retval;
        mode = inode->i_mode;
@@ -1638,17 +1639,17 @@ EXPORT_SYMBOL(set_binfmt);
 void set_dumpable(struct mm_struct *mm, int value)
 {
        switch (value) {
-        case SUID_DUMPABLE_DISABLED:
+        case SUID_DUMP_DISABLE:
                clear_bit(MMF_DUMPABLE, &mm->flags);
                smp_wmb();
                clear_bit(MMF_DUMP_SECURELY, &mm->flags);
                break;
-        case SUID_DUMPABLE_ENABLED:
+        case SUID_DUMP_USER:
                set_bit(MMF_DUMPABLE, &mm->flags);
                smp_wmb();
                clear_bit(MMF_DUMP_SECURELY, &mm->flags);
                break;
-        case SUID_DUMPABLE_SAFE:
+        case SUID_DUMP_ROOT:
                set_bit(MMF_DUMP_SECURELY, &mm->flags);
                smp_wmb();
                set_bit(MMF_DUMPABLE, &mm->flags);
@@ -1661,7 +1662,7 @@ int __get_dumpable(unsigned long mm_flags)
        int ret;
        ret = mm_flags & MMF_DUMPABLE_MASK;
-        return (ret > SUID_DUMPABLE_ENABLED) ? SUID_DUMPABLE_SAFE : ret;
+        return (ret > SUID_DUMP_USER) ? SUID_DUMP_ROOT : ret;
 }
 int get_dumpable(struct mm_struct *mm)
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c
index c61e62ac231c..46375896cfc0 100644
--- a/fs/exofs/dir.c
+++ b/fs/exofs/dir.c
@@ -242,7 +242,7 @@ static int
 exofs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
        loff_t pos = filp->f_pos;
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        unsigned int offset = pos & ~PAGE_CACHE_MASK;
        unsigned long n = pos >> PAGE_CACHE_SHIFT;
        unsigned long npages = dir_pages(inode);
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 5df4bb4aab14..262fc9940982 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -44,14 +44,13 @@ find_acceptable_alias(struct dentry *result,
 {
        struct dentry *dentry, *toput = NULL;
        struct inode *inode;
-        struct hlist_node *p;
        if (acceptable(context, result))
                return result;
        inode = result->d_inode;
        spin_lock(&inode->i_lock);
-        hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) {
+        hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
                dget(dentry);
                spin_unlock(&inode->i_lock);
                if (toput)
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index 2616d0ea5c5c..9f9992b37924 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -159,15 +159,6 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group)
        return bh;
 }
-static void release_blocks(struct super_block *sb, int count)
-{
-        if (count) {
-                struct ext2_sb_info *sbi = EXT2_SB(sb);
-                percpu_counter_add(&sbi->s_freeblocks_counter, count);
-        }
-}
 static void group_adjust_blocks(struct super_block *sb, int group_no,
        struct ext2_group_desc *desc, struct buffer_head *bh, int count)
 {
@@ -568,8 +559,11 @@ do_more:
        }
 error_return:
        brelse(bitmap_bh);
-        release_blocks(sb, freed);
+        if (freed) {
-        dquot_free_block_nodirty(inode, freed);
+                percpu_counter_add(&sbi->s_freeblocks_counter, freed);
+                dquot_free_block_nodirty(inode, freed);
+                mark_inode_dirty(inode);
+        }
 }
 /**
@@ -1239,10 +1233,6 @@ ext2_fsblk_t ext2_new_blocks(struct inode *inode, ext2_fsblk_t goal,
        *errp = -ENOSPC;
        sb = inode->i_sb;
-        if (!sb) {
-                printk("ext2_new_blocks: nonexistent device");
-                return 0;
-        }
        /*
         * Check quota for allocation of this block.
@@ -1416,9 +1406,11 @@ allocated:
        *errp = 0;
        brelse(bitmap_bh);
-        dquot_free_block_nodirty(inode, *count-num);
+        if (num < *count) {
-        mark_inode_dirty(inode);
+                dquot_free_block_nodirty(inode, *count-num);
-        *count = num;
+                mark_inode_dirty(inode);
+                *count = num;
+        }
        return ret_block;
 io_error:
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 0f4f5c929257..4237722bfd27 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -290,7 +290,7 @@ static int
 ext2_readdir (struct file * filp, void * dirent, filldir_t filldir)
 {
        loff_t pos = filp->f_pos;
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct super_block *sb = inode->i_sb;
        unsigned int offset = pos & ~PAGE_CACHE_MASK;
        unsigned long n = pos >> PAGE_CACHE_SHIFT;
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 6363ac66fafa..c3881e56662e 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -495,6 +495,10 @@ static int ext2_alloc_branch(struct inode *inode,
                 * parent to disk.
                 */
                bh = sb_getblk(inode->i_sb, new_blocks[n-1]);
+                if (unlikely(!bh)) {
+                        err = -ENOMEM;
+                        goto failed;
+                }
                branch[n].bh = bh;
                lock_buffer(bh);
                memset(bh->b_data, 0, blocksize);
@@ -523,6 +527,14 @@ static int ext2_alloc_branch(struct inode *inode,
        }
        *blks = num;
        return err;
+failed:
+        for (i = 1; i < n; i++)
+                bforget(branch[i].bh);
+        for (i = 0; i < indirect_blks; i++)
+                ext2_free_blocks(inode, new_blocks[i], 1);
+        ext2_free_blocks(inode, new_blocks[i], num);
+        return err;
 }
 /**
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c
index 2de655f5d625..5d46c09863f0 100644
--- a/fs/ext2/ioctl.c
+++ b/fs/ext2/ioctl.c
@@ -19,7 +19,7 @@
 long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
-        struct inode *inode = filp->f_dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct ext2_inode_info *ei = EXT2_I(inode);
        unsigned int flags;
        unsigned short rsv_window_size;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index fa04d023177e..7f68c8114026 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1500,7 +1500,7 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type,
                        bh = sb_bread(sb, tmp_bh.b_blocknr);
                else
                        bh = sb_getblk(sb, tmp_bh.b_blocknr);
-                if (!bh) {
+                if (unlikely(!bh)) {
                        err = -EIO;
                        goto out;
                }
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index b6754dbbce3c..2d7557db3ae8 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -662,10 +662,10 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
                        ea_idebug(inode, "creating block %d", block);
                        new_bh = sb_getblk(sb, block);
-                        if (!new_bh) {
+                        if (unlikely(!new_bh)) {
                                ext2_free_blocks(inode, block, 1);
                                mark_inode_dirty(inode);
-                                error = -EIO;
+                                error = -ENOMEM;
                                goto cleanup;
                        }
                        lock_buffer(new_bh);
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index dd91264ba94f..87eccbbca255 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -99,7 +99,7 @@ static int ext3_readdir(struct file * filp,
        int i, stored;
        struct ext3_dir_entry_2 *de;
        int err;
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct super_block *sb = inode->i_sb;
        int ret = 0;
        int dir_has_error = 0;
@@ -114,7 +114,7 @@ static int ext3_readdir(struct file * filp,
                 * We don't set the inode dirty flag since it's not
                 * critical that it get flushed back to the disk.
                 */
-                EXT3_I(filp->f_path.dentry->d_inode)->i_flags &= ~EXT3_INDEX_FL;
+                EXT3_I(file_inode(filp))->i_flags &= ~EXT3_INDEX_FL;
        }
        stored = 0;
        offset = filp->f_pos & (sb->s_blocksize - 1);
@@ -457,7 +457,7 @@ static int call_filldir(struct file * filp, void * dirent,
 {
        struct dir_private_info *info = filp->private_data;
        loff_t  curr_pos;
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct super_block * sb;
        int error;
@@ -487,7 +487,7 @@ static int ext3_dx_readdir(struct file * filp,
                         void * dirent, filldir_t filldir)
 {
        struct dir_private_info *info = filp->private_data;
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct fname *fname;
        int     ret;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index b176d4253544..d512c4bc4ad7 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -676,6 +676,10 @@ static int ext3_alloc_branch(handle_t *handle, struct inode *inode,
                 * parent to disk.
                 */
                bh = sb_getblk(inode->i_sb, new_blocks[n-1]);
+                if (unlikely(!bh)) {
+                        err = -ENOMEM;
+                        goto failed;
+                }
                branch[n].bh = bh;
                lock_buffer(bh);
                BUFFER_TRACE(bh, "call get_create_access");
@@ -717,7 +721,7 @@ failed:
                BUFFER_TRACE(branch[i].bh, "call journal_forget");
                ext3_journal_forget(handle, branch[i].bh);
        }
-        for (i = 0; i <indirect_blks; i++)
+        for (i = 0; i < indirect_blks; i++)
                ext3_free_blocks(handle, inode, new_blocks[i], 1);
        ext3_free_blocks(handle, inode, new_blocks[i], num);
@@ -1078,8 +1082,8 @@ struct buffer_head *ext3_getblk(handle_t *handle, struct inode *inode,
        if (!err && buffer_mapped(&dummy)) {
                struct buffer_head *bh;
                bh = sb_getblk(inode->i_sb, dummy.b_blocknr);
-                if (!bh) {
+                if (unlikely(!bh)) {
-                        *errp = -EIO;
+                        *errp = -ENOMEM;
                        goto err;
                }
                if (buffer_new(&dummy)) {
@@ -2729,12 +2733,12 @@ static int __ext3_get_inode_loc(struct inode *inode,
                return -EIO;
        bh = sb_getblk(inode->i_sb, block);
-        if (!bh) {
+        if (unlikely(!bh)) {
                ext3_error (inode->i_sb, "ext3_get_inode_loc",
                                "unable to read inode block - "
                                "inode=%lu, block="E3FSBLK,
                                 inode->i_ino, block);
-                return -EIO;
+                return -ENOMEM;
        }
        if (!buffer_uptodate(bh)) {
                lock_buffer(bh);
@@ -2783,7 +2787,7 @@ static int __ext3_get_inode_loc(struct inode *inode,
                        bitmap_bh = sb_getblk(inode->i_sb,
                                        le32_to_cpu(desc->bg_inode_bitmap));
-                        if (!bitmap_bh)
+                        if (unlikely(!bitmap_bh))
                                goto make_io;
                        /*
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index 677a5c27dc69..4d96e9a64532 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -14,7 +14,7 @@
 long ext3_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
-        struct inode *inode = filp->f_dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct ext3_inode_info *ei = EXT3_I(inode);
        unsigned int flags;
        unsigned short rsv_window_size;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 890b8947c546..692de13e3596 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -36,7 +36,6 @@
 #define NAMEI_RA_CHUNKS  2
 #define NAMEI_RA_BLOCKS  4
 #define NAMEI_RA_SIZE        (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
-#define NAMEI_RA_INDEX(c,b)  (((c) * NAMEI_RA_BLOCKS) + (b))
 static struct buffer_head *ext3_append(handle_t *handle,
                                        struct inode *inode,
@@ -624,7 +623,7 @@ int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
        dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash,
                       start_minor_hash));
-        dir = dir_file->f_path.dentry->d_inode;
+        dir = file_inode(dir_file);
        if (!(EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) {
                hinfo.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version;
                if (hinfo.hash_version <= DX_HASH_TEA)
@@ -638,7 +637,7 @@ int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
        }
        hinfo.hash = start_hash;
        hinfo.minor_hash = 0;
-        frame = dx_probe(NULL, dir_file->f_path.dentry->d_inode, &hinfo, frames, &err);
+        frame = dx_probe(NULL, file_inode(dir_file), &hinfo, frames, &err);
        if (!frame)
                return err;
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 0f814f3450de..27105655502c 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -116,8 +116,8 @@ static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
        int err;
        bh = sb_getblk(sb, blk);
-        if (!bh)
+        if (unlikely(!bh))
-                return ERR_PTR(-EIO);
+                return ERR_PTR(-ENOMEM);
        if ((err = ext3_journal_get_write_access(handle, bh))) {
                brelse(bh);
                bh = ERR_PTR(err);
@@ -234,8 +234,8 @@ static int setup_new_group_blocks(struct super_block *sb,
                        goto exit_bh;
                gdb = sb_getblk(sb, block);
-                if (!gdb) {
+                if (unlikely(!gdb)) {
-                        err = -EIO;
+                        err = -ENOMEM;
                        goto exit_bh;
                }
                if ((err = ext3_journal_get_write_access(handle, gdb))) {
@@ -722,8 +722,8 @@ static void update_backups(struct super_block *sb,
                        break;
                bh = sb_getblk(sb, group * bpg + blk_off);
-                if (!bh) {
+                if (unlikely(!bh)) {
-                        err = -EIO;
+                        err = -ENOMEM;
                        break;
                }
                ext3_debug("update metadata backup %#04lx\n",
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 6e50223b3299..5546ca225ffe 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -916,21 +916,24 @@ static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
                        "Not enough memory for storing quotafile name");
                return 0;
        }
-        if (sbi->s_qf_names[qtype] &&
+        if (sbi->s_qf_names[qtype]) {
-                strcmp(sbi->s_qf_names[qtype], qname)) {
+                int same = !strcmp(sbi->s_qf_names[qtype], qname);
-                ext3_msg(sb, KERN_ERR,
-                        "%s quota file already specified", QTYPE2NAME(qtype));
                kfree(qname);
-                return 0;
+                if (!same) {
+                        ext3_msg(sb, KERN_ERR,
+                                 "%s quota file already specified",
+                                 QTYPE2NAME(qtype));
+                }
+                return same;
        }
-        sbi->s_qf_names[qtype] = qname;
+        if (strchr(qname, '/')) {
-        if (strchr(sbi->s_qf_names[qtype], '/')) {
                ext3_msg(sb, KERN_ERR,
                        "quotafile must be on filesystem root");
-                kfree(sbi->s_qf_names[qtype]);
+                kfree(qname);
-                sbi->s_qf_names[qtype] = NULL;
                return 0;
        }
+        sbi->s_qf_names[qtype] = qname;
        set_opt(sbi->s_mount_opt, QUOTA);
        return 1;
 }
@@ -945,11 +948,10 @@ static int clear_qf_name(struct super_block *sb, int qtype) {
                        " when quota turned on");
                return 0;
        }
-        /*
+        if (sbi->s_qf_names[qtype]) {
-         * The space will be released later when all options are confirmed
+                kfree(sbi->s_qf_names[qtype]);
-         * to be correct
+                sbi->s_qf_names[qtype] = NULL;
-         */
+        }
-        sbi->s_qf_names[qtype] = NULL;
        return 1;
 }
 #endif
@@ -2065,6 +2067,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
                test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal":
                test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered":
                "writeback");
+        sb->s_flags |= MS_SNAP_STABLE;
        return 0;
@@ -2605,7 +2608,18 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
 #ifdef CONFIG_QUOTA
        old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
        for (i = 0; i < MAXQUOTAS; i++)
-                old_opts.s_qf_names[i] = sbi->s_qf_names[i];
+                if (sbi->s_qf_names[i]) {
+                        old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i],
+                                                         GFP_KERNEL);
+                        if (!old_opts.s_qf_names[i]) {
+                                int j;
+                                for (j = 0; j < i; j++)
+                                        kfree(old_opts.s_qf_names[j]);
+                                return -ENOMEM;
+                        }
+                } else
+                        old_opts.s_qf_names[i] = NULL;
 #endif
        /*
@@ -2698,9 +2712,7 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
 #ifdef CONFIG_QUOTA
        /* Release old quota file names */
        for (i = 0; i < MAXQUOTAS; i++)
-                if (old_opts.s_qf_names[i] &&
+                kfree(old_opts.s_qf_names[i]);
-                    old_opts.s_qf_names[i] != sbi->s_qf_names[i])
-                        kfree(old_opts.s_qf_names[i]);
 #endif
        if (enable_quota)
                dquot_resume(sb, -1);
@@ -2714,9 +2726,7 @@ restore_opts:
 #ifdef CONFIG_QUOTA
        sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
        for (i = 0; i < MAXQUOTAS; i++) {
-                if (sbi->s_qf_names[i] &&
+                kfree(sbi->s_qf_names[i]);
-                    old_opts.s_qf_names[i] != sbi->s_qf_names[i])
-                        kfree(sbi->s_qf_names[i]);
                sbi->s_qf_names[i] = old_opts.s_qf_names[i];
        }
 #endif
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index d22ebb7a4f55..b1fc96383e08 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -813,10 +813,10 @@ inserted:
                        ea_idebug(inode, "creating block %d", block);
                        new_bh = sb_getblk(sb, block);
-                        if (!new_bh) {
+                        if (unlikely(!new_bh)) {
 getblk_failed:
                                ext3_free_blocks(handle, inode, block, 1);
-                                error = -EIO;
+                                error = -ENOMEM;
                                goto cleanup;
                        }
                        lock_buffer(new_bh);
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index 0a475c881852..987358740cb9 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -41,6 +41,7 @@ config EXT4_USE_FOR_EXT23
 config EXT4_FS_POSIX_ACL
        bool "Ext4 POSIX Access Control Lists"
+        depends on EXT4_FS
        select FS_POSIX_ACL
        help
          POSIX Access Control Lists (ACLs) support permissions for users and
@@ -53,6 +54,7 @@ config EXT4_FS_POSIX_ACL
 config EXT4_FS_SECURITY
        bool "Ext4 Security Labels"
+        depends on EXT4_FS
        help
          Security labels support alternative access control models
          implemented by security modules like SELinux.  This option
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index e6e0d988439b..39a54a0e9fe4 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -324,8 +324,8 @@ ext4_acl_chmod(struct inode *inode)
        if (error)
                return error;
 retry:
-        handle = ext4_journal_start(inode,
+        handle = ext4_journal_start(inode, EXT4_HT_XATTR,
-                        EXT4_DATA_TRANS_BLOCKS(inode->i_sb));
+                                    ext4_jbd2_credits_xattr(inode));
        if (IS_ERR(handle)) {
                error = PTR_ERR(handle);
                ext4_std_error(inode->i_sb, error);
@@ -422,7 +422,8 @@ ext4_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
                acl = NULL;
 retry:
-        handle = ext4_journal_start(inode, EXT4_DATA_TRANS_BLOCKS(inode->i_sb));
+        handle = ext4_journal_start(inode, EXT4_HT_XATTR,
+                                    ext4_jbd2_credits_xattr(inode));
        if (IS_ERR(handle)) {
                error = PTR_ERR(handle);
                goto release_and_out;
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index cf1821784a16..2f2e0da1a6b7 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -358,7 +358,7 @@ void ext4_validate_block_bitmap(struct super_block *sb,
 }
 /**
- * ext4_read_block_bitmap()
+ * ext4_read_block_bitmap_nowait()
 * @sb:                 super block
 * @block_group:        given block group
 *
@@ -457,6 +457,8 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
        struct buffer_head *bh;
        bh = ext4_read_block_bitmap_nowait(sb, block_group);
+        if (!bh)
+                return NULL;
        if (ext4_wait_block_bitmap(sb, block_group, bh)) {
                put_bh(bh);
                return NULL;
@@ -482,11 +484,16 @@ static int ext4_has_free_clusters(struct ext4_sb_info *sbi,
        free_clusters  = percpu_counter_read_positive(fcc);
        dirty_clusters = percpu_counter_read_positive(dcc);
-        root_clusters = EXT4_B2C(sbi, ext4_r_blocks_count(sbi->s_es));
+        /*
+         * r_blocks_count should always be multiple of the cluster ratio so
+         * we are safe to do a plane bit shift only.
+         */
+        root_clusters = ext4_r_blocks_count(sbi->s_es) >> sbi->s_cluster_bits;
        if (free_clusters - (nclusters + root_clusters + dirty_clusters) <
                                        EXT4_FREECLUSTERS_WATERMARK) {
-                free_clusters  = EXT4_C2B(sbi, percpu_counter_sum_positive(fcc));
+                free_clusters  = percpu_counter_sum_positive(fcc);
                dirty_clusters = percpu_counter_sum_positive(dcc);
        }
        /* Check whether we have space after accounting for current
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 80a28b297279..6dda04f05ef4 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -110,7 +110,7 @@ static int ext4_readdir(struct file *filp,
        int i, stored;
        struct ext4_dir_entry_2 *de;
        int err;
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct super_block *sb = inode->i_sb;
        int ret = 0;
        int dir_has_error = 0;
@@ -133,7 +133,7 @@ static int ext4_readdir(struct file *filp,
                 * We don't set the inode dirty flag since it's not
                 * critical that it get flushed back to the disk.
                 */
-                ext4_clear_inode_flag(filp->f_path.dentry->d_inode,
+                ext4_clear_inode_flag(file_inode(filp),
                                      EXT4_INODE_INDEX);
        }
        stored = 0;
@@ -185,6 +185,7 @@ static int ext4_readdir(struct file *filp,
                                        "at offset %llu",
                                        (unsigned long long)filp->f_pos);
                        filp->f_pos += sb->s_blocksize - offset;
+                        brelse(bh);
                        continue;
                }
                set_buffer_verified(bh);
@@ -494,7 +495,7 @@ static int call_filldir(struct file *filp, void *dirent,
 {
        struct dir_private_info *info = filp->private_data;
        loff_t  curr_pos;
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct super_block *sb;
        int error;
@@ -526,7 +527,7 @@ static int ext4_dx_readdir(struct file *filp,
                         void *dirent, filldir_t filldir)
 {
        struct dir_private_info *info = filp->private_data;
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct fname *fname;
        int     ret;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 8462eb3c33aa..6e16c1867959 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -194,8 +194,7 @@ struct mpage_da_data {
 */
 #define EXT4_IO_END_UNWRITTEN   0x0001
 #define EXT4_IO_END_ERROR       0x0002
-#define EXT4_IO_END_QUEUED      0x0004
+#define EXT4_IO_END_DIRECT      0x0004
-#define EXT4_IO_END_DIRECT      0x0008
 struct ext4_io_page {
        struct page     *p_page;
@@ -215,10 +214,8 @@ typedef struct ext4_io_end {
        struct list_head        list;           /* per-file finished IO list */
        struct inode            *inode;         /* file being written to */
        unsigned int            flag;           /* unwritten or not */
-        struct page             *page;          /* for writepage() path */
        loff_t                  offset;         /* offset in the file */
        ssize_t                 size;           /* size of the extent */
-        struct work_struct      work;           /* data work queue */
        struct kiocb            *iocb;          /* iocb struct for AIO */
        int                     result;         /* error value for AIO */
        int                     num_io_pages;   /* for writepages() */
@@ -582,6 +579,8 @@ enum {
 #define EXT4_GET_BLOCKS_KEEP_SIZE               0x0080
        /* Do not take i_data_sem locking in ext4_map_blocks */
 #define EXT4_GET_BLOCKS_NO_LOCK                 0x0100
+        /* Do not put hole in extent cache */
+#define EXT4_GET_BLOCKS_NO_PUT_HOLE             0x0200
 /*
 * Flags used by ext4_free_blocks
@@ -810,17 +809,6 @@ do {									       \
 #endif /* defined(__KERNEL__) || defined(__linux__) */
-/*
- * storage for cached extent
- * If ec_len == 0, then the cache is invalid.
- * If ec_start == 0, then the cache represents a gap (null mapping)
- */
-struct ext4_ext_cache {
-        ext4_fsblk_t    ec_start;
-        ext4_lblk_t     ec_block;
-        __u32           ec_len; /* must be 32bit to return holes */
-};
 #include "extents_status.h"
 /*
@@ -887,7 +875,6 @@ struct ext4_inode_info {
        struct inode vfs_inode;
        struct jbd2_inode *jinode;
-        struct ext4_ext_cache i_cached_extent;
        /*
         * File creation time. Its function is same as that of
         * struct timespec i_{a,c,m}time in the generic inode.
@@ -901,6 +888,8 @@ struct ext4_inode_info {
        /* extents status tree */
        struct ext4_es_tree i_es_tree;
        rwlock_t i_es_lock;
+        struct list_head i_es_lru;
+        unsigned int i_es_lru_nr;       /* protected by i_es_lock */
        /* ialloc */
        ext4_group_t    i_last_alloc_group;
@@ -930,6 +919,7 @@ struct ext4_inode_info {
        spinlock_t i_completed_io_lock;
        atomic_t i_ioend_count; /* Number of outstanding io_end structs */
        atomic_t i_unwritten; /* Nr. of inflight conversions pending */
+        struct work_struct i_unwritten_work;    /* deferred extent conversion */
        spinlock_t i_block_reservation_lock;
@@ -985,7 +975,6 @@ struct ext4_inode_info {
 #define EXT4_MOUNT_DIOREAD_NOLOCK       0x400000 /* Enable support for dio read nolocking */
 #define EXT4_MOUNT_JOURNAL_CHECKSUM     0x800000 /* Journal checksums */
 #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
-#define EXT4_MOUNT_MBLK_IO_SUBMIT       0x4000000 /* multi-block io submits */
 #define EXT4_MOUNT_DELALLOC             0x8000000 /* Delalloc support */
 #define EXT4_MOUNT_DATA_ERR_ABORT       0x10000000 /* Abort on file data write */
 #define EXT4_MOUNT_BLOCK_VALIDITY       0x20000000 /* Block validity checking */
@@ -1316,6 +1305,11 @@ struct ext4_sb_info {
        /* Precomputed FS UUID checksum for seeding other checksums */
        __u32 s_csum_seed;
+        /* Reclaim extents from extent status tree */
+        struct shrinker s_es_shrinker;
+        struct list_head s_es_lru;
+        spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp;
 };
 static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
@@ -2007,9 +2001,20 @@ extern int ext4fs_dirhash(const char *name, int len, struct
                          dx_hash_info *hinfo);
 /* ialloc.c */
-extern struct inode *ext4_new_inode(handle_t *, struct inode *, umode_t,
+extern struct inode *__ext4_new_inode(handle_t *, struct inode *, umode_t,
-                                    const struct qstr *qstr, __u32 goal,
+                                      const struct qstr *qstr, __u32 goal,
-                                    uid_t *owner);
+                                      uid_t *owner, int handle_type,
+                                      unsigned int line_no, int nblocks);
+#define ext4_new_inode(handle, dir, mode, qstr, goal, owner) \
+        __ext4_new_inode((handle), (dir), (mode), (qstr), (goal), (owner), \
+                         0, 0, 0)
+#define ext4_new_inode_start_handle(dir, mode, qstr, goal, owner, \
+                                    type, nblocks)                  \
+        __ext4_new_inode(NULL, (dir), (mode), (qstr), (goal), (owner), \
+                         (type), __LINE__, (nblocks))
 extern void ext4_free_inode(handle_t *, struct inode *);
 extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
 extern unsigned long ext4_count_free_inodes(struct super_block *);
@@ -2103,6 +2108,7 @@ extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
 extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock);
 extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk);
 extern void ext4_ind_truncate(struct inode *inode);
+extern int ext4_ind_punch_hole(struct file *file, loff_t offset, loff_t length);
 /* ioctl.c */
 extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
@@ -2151,6 +2157,8 @@ extern void *ext4_kvzalloc(size_t size, gfp_t flags);
 extern void ext4_kvfree(void *ptr);
 extern int ext4_alloc_flex_bg_array(struct super_block *sb,
                                    ext4_group_t ngroup);
+extern const char *ext4_decode_error(struct super_block *sb, int errno,
+                                     char nbuf[16]);
 extern __printf(4, 5)
 void __ext4_error(struct super_block *, const char *, unsigned int,
                  const char *, ...);
@@ -2227,6 +2235,8 @@ extern int ext4_group_desc_csum_verify(struct super_block *sb, __u32 group,
                                       struct ext4_group_desc *gdp);
 extern void ext4_group_desc_csum_set(struct super_block *sb, __u32 group,
                                     struct ext4_group_desc *gdp);
+extern int ext4_register_li_request(struct super_block *sb,
+                                    ext4_group_t first_not_zeroed);
 static inline int ext4_has_group_desc_csum(struct super_block *sb)
 {
@@ -2454,6 +2464,75 @@ extern const struct file_operations ext4_file_operations;
 extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
 extern void ext4_unwritten_wait(struct inode *inode);
+/* inline.c */
+extern int ext4_has_inline_data(struct inode *inode);
+extern int ext4_get_inline_size(struct inode *inode);
+extern int ext4_get_max_inline_size(struct inode *inode);
+extern int ext4_find_inline_data_nolock(struct inode *inode);
+extern void ext4_write_inline_data(struct inode *inode,
+                                   struct ext4_iloc *iloc,
+                                   void *buffer, loff_t pos,
+                                   unsigned int len);
+extern int ext4_prepare_inline_data(handle_t *handle, struct inode *inode,
+                                    unsigned int len);
+extern int ext4_init_inline_data(handle_t *handle, struct inode *inode,
+                                 unsigned int len);
+extern int ext4_destroy_inline_data(handle_t *handle, struct inode *inode);
+extern int ext4_readpage_inline(struct inode *inode, struct page *page);
+extern int ext4_try_to_write_inline_data(struct address_space *mapping,
+                                         struct inode *inode,
+                                         loff_t pos, unsigned len,
+                                         unsigned flags,
+                                         struct page **pagep);
+extern int ext4_write_inline_data_end(struct inode *inode,
+                                      loff_t pos, unsigned len,
+                                      unsigned copied,
+                                      struct page *page);
+extern struct buffer_head *
+ext4_journalled_write_inline_data(struct inode *inode,
+                                  unsigned len,
+                                  struct page *page);
+extern int ext4_da_write_inline_data_begin(struct address_space *mapping,
+                                           struct inode *inode,
+                                           loff_t pos, unsigned len,
+                                           unsigned flags,
+                                           struct page **pagep,
+                                           void **fsdata);
+extern int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
+                                         unsigned len, unsigned copied,
+                                         struct page *page);
+extern int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry,
+                                     struct inode *inode);
+extern int ext4_try_create_inline_dir(handle_t *handle,
+                                      struct inode *parent,
+                                      struct inode *inode);
+extern int ext4_read_inline_dir(struct file *filp,
+                                void *dirent, filldir_t filldir,
+                                int *has_inline_data);
+extern struct buffer_head *ext4_find_inline_entry(struct inode *dir,
+                                        const struct qstr *d_name,
+                                        struct ext4_dir_entry_2 **res_dir,
+                                        int *has_inline_data);
+extern int ext4_delete_inline_entry(handle_t *handle,
+                                    struct inode *dir,
+                                    struct ext4_dir_entry_2 *de_del,
+                                    struct buffer_head *bh,
+                                    int *has_inline_data);
+extern int empty_inline_dir(struct inode *dir, int *has_inline_data);
+extern struct buffer_head *ext4_get_first_inline_block(struct inode *inode,
+                                        struct ext4_dir_entry_2 **parent_de,
+                                        int *retval);
+extern int ext4_inline_data_fiemap(struct inode *inode,
+                                   struct fiemap_extent_info *fieinfo,
+                                   int *has_inline);
+extern int ext4_try_to_evict_inline_data(handle_t *handle,
+                                         struct inode *inode,
+                                         int needed);
+extern void ext4_inline_data_truncate(struct inode *inode, int *has_inline);
+extern int ext4_convert_inline_data(struct inode *inode);
 /* namei.c */
 extern const struct inode_operations ext4_dir_inode_operations;
 extern const struct inode_operations ext4_special_inode_operations;
@@ -2520,6 +2599,9 @@ extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
                                                  struct ext4_ext_path *);
 extern void ext4_ext_drop_refs(struct ext4_ext_path *);
 extern int ext4_ext_check_inode(struct inode *inode);
+extern int ext4_find_delalloc_range(struct inode *inode,
+                                    ext4_lblk_t lblk_start,
+                                    ext4_lblk_t lblk_end);
 extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk);
 extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                        __u64 start, __u64 len);
@@ -2537,6 +2619,7 @@ extern void ext4_exit_pageio(void);
 extern void ext4_ioend_wait(struct inode *);
 extern void ext4_free_io_end(ext4_io_end_t *io);
 extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
+extern void ext4_end_io_work(struct work_struct *work);
 extern void ext4_io_submit(struct ext4_io_submit *io);
 extern int ext4_bio_write_page(struct ext4_io_submit *io,
                               struct page *page,
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 487fda12bc00..8643ff5bbeb7 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -193,12 +193,6 @@ static inline unsigned short ext_depth(struct inode *inode)
        return le16_to_cpu(ext_inode_hdr(inode)->eh_depth);
 }
-static inline void
-ext4_ext_invalidate_cache(struct inode *inode)
-{
-        EXT4_I(inode)->i_cached_extent.ec_len = 0;
-}
 static inline void ext4_ext_mark_uninitialized(struct ext4_extent *ext)
 {
        /* We can not have an uninitialized extent of zero length! */
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index b4323ba846b5..7058975e3a55 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -6,6 +6,108 @@
 #include <trace/events/ext4.h>
+/* Just increment the non-pointer handle value */
+static handle_t *ext4_get_nojournal(void)
+{
+        handle_t *handle = current->journal_info;
+        unsigned long ref_cnt = (unsigned long)handle;
+        BUG_ON(ref_cnt >= EXT4_NOJOURNAL_MAX_REF_COUNT);
+        ref_cnt++;
+        handle = (handle_t *)ref_cnt;
+        current->journal_info = handle;
+        return handle;
+}
+/* Decrement the non-pointer handle value */
+static void ext4_put_nojournal(handle_t *handle)
+{
+        unsigned long ref_cnt = (unsigned long)handle;
+        BUG_ON(ref_cnt == 0);
+        ref_cnt--;
+        handle = (handle_t *)ref_cnt;
+        current->journal_info = handle;
+}
+/*
+ * Wrappers for jbd2_journal_start/end.
+ */
+handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line,
+                                  int type, int nblocks)
+{
+        journal_t *journal;
+        trace_ext4_journal_start(sb, nblocks, _RET_IP_);
+        if (sb->s_flags & MS_RDONLY)
+                return ERR_PTR(-EROFS);
+        WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE);
+        journal = EXT4_SB(sb)->s_journal;
+        if (!journal)
+                return ext4_get_nojournal();
+        /*
+         * Special case here: if the journal has aborted behind our
+         * backs (eg. EIO in the commit thread), then we still need to
+         * take the FS itself readonly cleanly.
+         */
+        if (is_journal_aborted(journal)) {
+                ext4_abort(sb, "Detected aborted journal");
+                return ERR_PTR(-EROFS);
+        }
+        return jbd2__journal_start(journal, nblocks, GFP_NOFS, type, line);
+}
+int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle)
+{
+        struct super_block *sb;
+        int err;
+        int rc;
+        if (!ext4_handle_valid(handle)) {
+                ext4_put_nojournal(handle);
+                return 0;
+        }
+        sb = handle->h_transaction->t_journal->j_private;
+        err = handle->h_err;
+        rc = jbd2_journal_stop(handle);
+        if (!err)
+                err = rc;
+        if (err)
+                __ext4_std_error(sb, where, line, err);
+        return err;
+}
+void ext4_journal_abort_handle(const char *caller, unsigned int line,
+                               const char *err_fn, struct buffer_head *bh,
+                               handle_t *handle, int err)
+{
+        char nbuf[16];
+        const char *errstr = ext4_decode_error(NULL, err, nbuf);
+        BUG_ON(!ext4_handle_valid(handle));
+        if (bh)
+                BUFFER_TRACE(bh, "abort");
+        if (!handle->h_err)
+                handle->h_err = err;
+        if (is_handle_aborted(handle))
+                return;
+        printk(KERN_ERR "EXT4-fs: %s:%d: aborting transaction: %s in %s\n",
+               caller, line, errstr, err_fn);
+        jbd2_journal_abort_handle(handle);
+}
 int __ext4_journal_get_write_access(const char *where, unsigned int line,
                                    handle_t *handle, struct buffer_head *bh)
 {
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index 7177f9b21cb2..4c216b1bf20c 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -59,12 +59,6 @@
 #define EXT4_META_TRANS_BLOCKS(sb)      (EXT4_XATTR_TRANS_BLOCKS + \
                                        EXT4_MAXQUOTAS_TRANS_BLOCKS(sb))
-/* Delete operations potentially hit one directory's namespace plus an
- * entire inode, plus arbitrary amounts of bitmap/indirection data.  Be
- * generous.  We can grow the delete transaction later if necessary. */
-#define EXT4_DELETE_TRANS_BLOCKS(sb)    (2 * EXT4_DATA_TRANS_BLOCKS(sb) + 64)
 /* Define an arbitrary limit for the amount of data we will anticipate
 * writing to any given transaction.  For unbounded transactions such as
 * write(2) and truncate(2) we can write more than this, but we always
@@ -110,6 +104,36 @@
 #define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
 #define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
+static inline int ext4_jbd2_credits_xattr(struct inode *inode)
+{
+        int credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb);
+        /*
+         * In case of inline data, we may push out the data to a block,
+         * so we need to reserve credits for this eventuality
+         */
+        if (ext4_has_inline_data(inode))
+                credits += ext4_writepage_trans_blocks(inode) + 1;
+        return credits;
+}
+/*
+ * Ext4 handle operation types -- for logging purposes
+ */
+#define EXT4_HT_MISC             0
+#define EXT4_HT_INODE            1
+#define EXT4_HT_WRITE_PAGE       2
+#define EXT4_HT_MAP_BLOCKS       3
+#define EXT4_HT_DIR              4
+#define EXT4_HT_TRUNCATE         5
+#define EXT4_HT_QUOTA            6
+#define EXT4_HT_RESIZE           7
+#define EXT4_HT_MIGRATE          8
+#define EXT4_HT_MOVE_EXTENTS     9
+#define EXT4_HT_XATTR           10
+#define EXT4_HT_MAX             11
 /**
 *   struct ext4_journal_cb_entry - Base structure for callback information.
 *
@@ -234,7 +258,8 @@ int __ext4_handle_dirty_super(const char *where, unsigned int line,
 #define ext4_handle_dirty_super(handle, sb) \
        __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb))
-handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks);
+handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line,
+                                  int type, int nblocks);
 int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle);
 #define EXT4_NOJOURNAL_MAX_REF_COUNT ((unsigned long) 4096)
@@ -268,9 +293,17 @@ static inline int ext4_handle_has_enough_credits(handle_t *handle, int needed)
        return 1;
 }
-static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks)
+#define ext4_journal_start_sb(sb, type, nblocks)                        \
+        __ext4_journal_start_sb((sb), __LINE__, (type), (nblocks))
+#define ext4_journal_start(inode, type, nblocks)                        \
+        __ext4_journal_start((inode), __LINE__, (type), (nblocks))
+static inline handle_t *__ext4_journal_start(struct inode *inode,
+                                             unsigned int line, int type,
+                                             int nblocks)
 {
-        return ext4_journal_start_sb(inode->i_sb, nblocks);
+        return __ext4_journal_start_sb(inode->i_sb, line, type, nblocks);
 }
 #define ext4_journal_stop(handle) \
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 5ae1674ec12f..28dd8eeea6a9 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -112,7 +112,7 @@ static int ext4_split_extent_at(handle_t *handle,
                             int flags);
 static int ext4_find_delayed_extent(struct inode *inode,
-                                    struct ext4_ext_cache *newex);
+                                    struct extent_status *newes);
 static int ext4_ext_truncate_extend_restart(handle_t *handle,
                                            struct inode *inode,
@@ -714,7 +714,6 @@ int ext4_ext_tree_init(handle_t *handle, struct inode *inode)
        eh->eh_magic = EXT4_EXT_MAGIC;
        eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode, 0));
        ext4_mark_inode_dirty(handle, inode);
-        ext4_ext_invalidate_cache(inode);
        return 0;
 }
@@ -725,6 +724,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
        struct ext4_extent_header *eh;
        struct buffer_head *bh;
        short int depth, i, ppos = 0, alloc = 0;
+        int ret;
        eh = ext_inode_hdr(inode);
        depth = ext_depth(inode);
@@ -752,12 +752,15 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
                path[ppos].p_ext = NULL;
                bh = sb_getblk(inode->i_sb, path[ppos].p_block);
-                if (unlikely(!bh))
+                if (unlikely(!bh)) {
+                        ret = -ENOMEM;
                        goto err;
+                }
                if (!bh_uptodate_or_lock(bh)) {
                        trace_ext4_ext_load_extent(inode, block,
                                                path[ppos].p_block);
-                        if (bh_submit_read(bh) < 0) {
+                        ret = bh_submit_read(bh);
+                        if (ret < 0) {
                                put_bh(bh);
                                goto err;
                        }
@@ -768,13 +771,15 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
                        put_bh(bh);
                        EXT4_ERROR_INODE(inode,
                                         "ppos %d > depth %d", ppos, depth);
+                        ret = -EIO;
                        goto err;
                }
                path[ppos].p_bh = bh;
                path[ppos].p_hdr = eh;
                i--;
-                if (ext4_ext_check_block(inode, eh, i, bh))
+                ret = ext4_ext_check_block(inode, eh, i, bh);
+                if (ret < 0)
                        goto err;
        }
@@ -796,7 +801,7 @@ err:
        ext4_ext_drop_refs(path);
        if (alloc)
                kfree(path);
-        return ERR_PTR(-EIO);
+        return ERR_PTR(ret);
 }
 /*
@@ -950,8 +955,8 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
                goto cleanup;
        }
        bh = sb_getblk(inode->i_sb, newblock);
-        if (!bh) {
+        if (unlikely(!bh)) {
-                err = -EIO;
+                err = -ENOMEM;
                goto cleanup;
        }
        lock_buffer(bh);
@@ -1023,8 +1028,8 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
                oldblock = newblock;
                newblock = ablocks[--a];
                bh = sb_getblk(inode->i_sb, newblock);
-                if (!bh) {
+                if (unlikely(!bh)) {
-                        err = -EIO;
+                        err = -ENOMEM;
                        goto cleanup;
                }
                lock_buffer(bh);
@@ -1136,11 +1141,8 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
                return err;
        bh = sb_getblk(inode->i_sb, newblock);
-        if (!bh) {
+        if (unlikely(!bh))
-                err = -EIO;
+                return -ENOMEM;
-                ext4_std_error(inode->i_sb, err);
-                return err;
-        }
        lock_buffer(bh);
        err = ext4_journal_get_create_access(handle, bh);
@@ -1960,7 +1962,6 @@ cleanup:
                ext4_ext_drop_refs(npath);
                kfree(npath);
        }
-        ext4_ext_invalidate_cache(inode);
        return err;
 }
@@ -1969,8 +1970,8 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
                                    struct fiemap_extent_info *fieinfo)
 {
        struct ext4_ext_path *path = NULL;
-        struct ext4_ext_cache newex;
        struct ext4_extent *ex;
+        struct extent_status es;
        ext4_lblk_t next, next_del, start = 0, end = 0;
        ext4_lblk_t last = block + num;
        int exists, depth = 0, err = 0;
@@ -2044,37 +2045,47 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
                BUG_ON(end <= start);
                if (!exists) {
-                        newex.ec_block = start;
+                        es.es_lblk = start;
-                        newex.ec_len = end - start;
+                        es.es_len = end - start;
-                        newex.ec_start = 0;
+                        es.es_pblk = 0;
                } else {
-                        newex.ec_block = le32_to_cpu(ex->ee_block);
+                        es.es_lblk = le32_to_cpu(ex->ee_block);
-                        newex.ec_len = ext4_ext_get_actual_len(ex);
+                        es.es_len = ext4_ext_get_actual_len(ex);
-                        newex.ec_start = ext4_ext_pblock(ex);
+                        es.es_pblk = ext4_ext_pblock(ex);
                        if (ext4_ext_is_uninitialized(ex))
                                flags |= FIEMAP_EXTENT_UNWRITTEN;
                }
                /*
-                 * Find delayed extent and update newex accordingly. We call
+                 * Find delayed extent and update es accordingly. We call
-                 * it even in !exists case to find out whether newex is the
+                 * it even in !exists case to find out whether es is the
                 * last existing extent or not.
                 */
-                next_del = ext4_find_delayed_extent(inode, &newex);
+                next_del = ext4_find_delayed_extent(inode, &es);
                if (!exists && next_del) {
                        exists = 1;
                        flags |= FIEMAP_EXTENT_DELALLOC;
                }
                up_read(&EXT4_I(inode)->i_data_sem);
-                if (unlikely(newex.ec_len == 0)) {
+                if (unlikely(es.es_len == 0)) {
-                        EXT4_ERROR_INODE(inode, "newex.ec_len == 0");
+                        EXT4_ERROR_INODE(inode, "es.es_len == 0");
                        err = -EIO;
                        break;
                }
-                /* This is possible iff next == next_del == EXT_MAX_BLOCKS */
+                /*
-                if (next == next_del) {
+                 * This is possible iff next == next_del == EXT_MAX_BLOCKS.
+                 * we need to check next == EXT_MAX_BLOCKS because it is
+                 * possible that an extent is with unwritten and delayed
+                 * status due to when an extent is delayed allocated and
+                 * is allocated by fallocate status tree will track both of
+                 * them in a extent.
+                 *
+                 * So we could return a unwritten and delayed extent, and
+                 * its block is equal to 'next'.
+                 */
+                if (next == next_del && next == EXT_MAX_BLOCKS) {
                        flags |= FIEMAP_EXTENT_LAST;
                        if (unlikely(next_del != EXT_MAX_BLOCKS ||
                                     next != EXT_MAX_BLOCKS)) {
@@ -2089,9 +2100,9 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
                if (exists) {
                        err = fiemap_fill_next_extent(fieinfo,
-                                (__u64)newex.ec_block << blksize_bits,
+                                (__u64)es.es_lblk << blksize_bits,
-                                (__u64)newex.ec_start << blksize_bits,
+                                (__u64)es.es_pblk << blksize_bits,
-                                (__u64)newex.ec_len << blksize_bits,
+                                (__u64)es.es_len << blksize_bits,
                                flags);
                        if (err < 0)
                                break;
@@ -2101,7 +2112,7 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
                        }
                }
-                block = newex.ec_block + newex.ec_len;
+                block = es.es_lblk + es.es_len;
        }
        if (path) {
@@ -2112,21 +2123,6 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
        return err;
 }
-static void
-ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block,
-                        __u32 len, ext4_fsblk_t start)
-{
-        struct ext4_ext_cache *cex;
-        BUG_ON(len == 0);
-        spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
-        trace_ext4_ext_put_in_cache(inode, block, len, start);
-        cex = &EXT4_I(inode)->i_cached_extent;
-        cex->ec_block = block;
-        cex->ec_len = len;
-        cex->ec_start = start;
-        spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
-}
 /*
 * ext4_ext_put_gap_in_cache:
 * calculate boundaries of the gap that the requested block fits into
@@ -2143,9 +2139,10 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
        ex = path[depth].p_ext;
        if (ex == NULL) {
-                /* there is no extent yet, so gap is [0;-] */
+                /*
-                lblock = 0;
+                 * there is no extent yet, so gap is [0;-] and we
-                len = EXT_MAX_BLOCKS;
+                 * don't cache it
+                 */
                ext_debug("cache gap(whole file):");
        } else if (block < le32_to_cpu(ex->ee_block)) {
                lblock = block;
@@ -2154,6 +2151,9 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
                                block,
                                le32_to_cpu(ex->ee_block),
                                 ext4_ext_get_actual_len(ex));
+                if (!ext4_find_delalloc_range(inode, lblock, lblock + len - 1))
+                        ext4_es_insert_extent(inode, lblock, len, ~0,
+                                              EXTENT_STATUS_HOLE);
        } else if (block >= le32_to_cpu(ex->ee_block)
                        + ext4_ext_get_actual_len(ex)) {
                ext4_lblk_t next;
@@ -2167,58 +2167,15 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
                                block);
                BUG_ON(next == lblock);
                len = next - lblock;
+                if (!ext4_find_delalloc_range(inode, lblock, lblock + len - 1))
+                        ext4_es_insert_extent(inode, lblock, len, ~0,
+                                              EXTENT_STATUS_HOLE);
        } else {
                lblock = len = 0;
                BUG();
        }
        ext_debug(" -> %u:%lu\n", lblock, len);
-        ext4_ext_put_in_cache(inode, lblock, len, 0);
-}
-/*
- * ext4_ext_in_cache()
- * Checks to see if the given block is in the cache.
- * If it is, the cached extent is stored in the given
- * cache extent pointer.
- *
- * @inode: The files inode
- * @block: The block to look for in the cache
- * @ex:    Pointer where the cached extent will be stored
- *         if it contains block
- *
- * Return 0 if cache is invalid; 1 if the cache is valid
- */
-static int
-ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
-                  struct ext4_extent *ex)
-{
-        struct ext4_ext_cache *cex;
-        int ret = 0;
-        /*
-         * We borrow i_block_reservation_lock to protect i_cached_extent
-         */
-        spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
-        cex = &EXT4_I(inode)->i_cached_extent;
-        /* has cache valid data? */
-        if (cex->ec_len == 0)
-                goto errout;
-        if (in_range(block, cex->ec_block, cex->ec_len)) {
-                ex->ee_block = cpu_to_le32(cex->ec_block);
-                ext4_ext_store_pblock(ex, cex->ec_start);
-                ex->ee_len = cpu_to_le16(cex->ec_len);
-                ext_debug("%u cached by %u:%u:%llu\n",
-                                block,
-                                cex->ec_block, cex->ec_len, cex->ec_start);
-                ret = 1;
-        }
-errout:
-        trace_ext4_ext_in_cache(inode, block, ret);
-        spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
-        return ret;
 }
 /*
@@ -2653,13 +2610,11 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
        ext_debug("truncate since %u to %u\n", start, end);
        /* probably first extent we're gonna free will be last in block */
-        handle = ext4_journal_start(inode, depth + 1);
+        handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, depth + 1);
        if (IS_ERR(handle))
                return PTR_ERR(handle);
 again:
-        ext4_ext_invalidate_cache(inode);
        trace_ext4_ext_remove_space(inode, start, depth);
        /*
@@ -3519,19 +3474,19 @@ out:
 *
 * Return 1 if there is a delalloc block in the range, otherwise 0.
 */
-static int ext4_find_delalloc_range(struct inode *inode,
+int ext4_find_delalloc_range(struct inode *inode,
-                                    ext4_lblk_t lblk_start,
+                             ext4_lblk_t lblk_start,
-                                    ext4_lblk_t lblk_end)
+                             ext4_lblk_t lblk_end)
 {
        struct extent_status es;
-        es.start = lblk_start;
+        ext4_es_find_delayed_extent(inode, lblk_start, &es);
-        ext4_es_find_extent(inode, &es);
+        if (es.es_len == 0)
-        if (es.len == 0)
                return 0; /* there is no delay extent in this tree */
-        else if (es.start <= lblk_start && lblk_start < es.start + es.len)
+        else if (es.es_lblk <= lblk_start &&
+                 lblk_start < es.es_lblk + es.es_len)
                return 1;
-        else if (lblk_start <= es.start && es.start <= lblk_end)
+        else if (lblk_start <= es.es_lblk && es.es_lblk <= lblk_end)
                return 1;
        else
                return 0;
@@ -3656,6 +3611,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
                        ext4_set_io_unwritten_flag(inode, io);
                else
                        ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
+                map->m_flags |= EXT4_MAP_UNWRITTEN;
                if (ext4_should_dioread_nolock(inode))
                        map->m_flags |= EXT4_MAP_UNINIT;
                goto out;
@@ -3677,8 +3633,10 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
         * repeat fallocate creation request
         * we already have an unwritten extent
         */
-        if (flags & EXT4_GET_BLOCKS_UNINIT_EXT)
+        if (flags & EXT4_GET_BLOCKS_UNINIT_EXT) {
+                map->m_flags |= EXT4_MAP_UNWRITTEN;
                goto map_out;
+        }
        /* buffered READ or buffered write_begin() lookup */
        if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
@@ -3898,35 +3856,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                  map->m_lblk, map->m_len, inode->i_ino);
        trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
-        /* check in cache */
-        if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) {
-                if (!newex.ee_start_lo && !newex.ee_start_hi) {
-                        if ((sbi->s_cluster_ratio > 1) &&
-                            ext4_find_delalloc_cluster(inode, map->m_lblk))
-                                map->m_flags |= EXT4_MAP_FROM_CLUSTER;
-                        if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
-                                /*
-                                 * block isn't allocated yet and
-                                 * user doesn't want to allocate it
-                                 */
-                                goto out2;
-                        }
-                        /* we should allocate requested block */
-                } else {
-                        /* block is already allocated */
-                        if (sbi->s_cluster_ratio > 1)
-                                map->m_flags |= EXT4_MAP_FROM_CLUSTER;
-                        newblock = map->m_lblk
-                                   - le32_to_cpu(newex.ee_block)
-                                   + ext4_ext_pblock(&newex);
-                        /* number of remaining blocks in the extent */
-                        allocated = ext4_ext_get_actual_len(&newex) -
-                                (map->m_lblk - le32_to_cpu(newex.ee_block));
-                        goto out;
-                }
-        }
        /* find extent for this block */
        path = ext4_ext_find_extent(inode, map->m_lblk, NULL);
        if (IS_ERR(path)) {
@@ -3973,15 +3902,9 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                        ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk,
                                  ee_block, ee_len, newblock);
-                        /*
+                        if (!ext4_ext_is_uninitialized(ex))
-                         * Do not put uninitialized extent
-                         * in the cache
-                         */
-                        if (!ext4_ext_is_uninitialized(ex)) {
-                                ext4_ext_put_in_cache(inode, ee_block,
-                                        ee_len, ee_start);
                                goto out;
-                        }
                        allocated = ext4_ext_handle_uninitialized_extents(
                                handle, inode, map, path, flags,
                                allocated, newblock);
@@ -4002,7 +3925,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                 * put just found gap into cache to speed up
                 * subsequent requests
                 */
-                ext4_ext_put_gap_in_cache(inode, path, map->m_lblk);
+                if ((flags & EXT4_GET_BLOCKS_NO_PUT_HOLE) == 0)
+                        ext4_ext_put_gap_in_cache(inode, path, map->m_lblk);
                goto out2;
        }
@@ -4108,6 +4032,7 @@ got_allocated_blocks:
        /* Mark uninitialized */
        if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){
                ext4_ext_mark_uninitialized(&newex);
+                map->m_flags |= EXT4_MAP_UNWRITTEN;
                /*
                 * io_end structure was created for every IO write to an
                 * uninitialized extent. To avoid unnecessary conversion,
@@ -4241,10 +4166,9 @@ got_allocated_blocks:
         * Cache the extent and update transaction to commit on fdatasync only
         * when it is _not_ an uninitialized extent.
         */
-        if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) {
+        if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0)
-                ext4_ext_put_in_cache(inode, map->m_lblk, allocated, newblock);
                ext4_update_inode_fsync_trans(handle, inode, 1);
-        } else
+        else
                ext4_update_inode_fsync_trans(handle, inode, 0);
 out:
        if (allocated > map->m_len)
@@ -4284,7 +4208,7 @@ void ext4_ext_truncate(struct inode *inode)
         * probably first extent we're gonna free will be last in block
         */
        err = ext4_writepage_trans_blocks(inode);
-        handle = ext4_journal_start(inode, err);
+        handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, err);
        if (IS_ERR(handle))
                return;
@@ -4303,7 +4227,6 @@ void ext4_ext_truncate(struct inode *inode)
                goto out_stop;
        down_write(&EXT4_I(inode)->i_data_sem);
-        ext4_ext_invalidate_cache(inode);
        ext4_discard_preallocations(inode);
@@ -4386,7 +4309,7 @@ static void ext4_falloc_update_inode(struct inode *inode,
 */
 long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 {
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(file);
        handle_t *handle;
        loff_t new_size;
        unsigned int max_blocks;
@@ -4397,13 +4320,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
        struct ext4_map_blocks map;
        unsigned int credits, blkbits = inode->i_blkbits;
-        /*
-         * currently supporting (pre)allocate mode for extent-based
-         * files _only_
-         */
-        if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
-                return -EOPNOTSUPP;
        /* Return error if mode is not supported */
        if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
                return -EOPNOTSUPP;
@@ -4415,6 +4331,13 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
        if (ret)
                return ret;
+        /*
+         * currently supporting (pre)allocate mode for extent-based
+         * files _only_
+         */
+        if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
+                return -EOPNOTSUPP;
        trace_ext4_fallocate_enter(inode, offset, len, mode);
        map.m_lblk = offset >> blkbits;
        /*
@@ -4451,7 +4374,8 @@ retry:
        while (ret >= 0 && ret < max_blocks) {
                map.m_lblk = map.m_lblk + ret;
                map.m_len = max_blocks = max_blocks - ret;
-                handle = ext4_journal_start(inode, credits);
+                handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
+                                            credits);
                if (IS_ERR(handle)) {
                        ret = PTR_ERR(handle);
                        break;
@@ -4459,11 +4383,11 @@ retry:
                ret = ext4_map_blocks(handle, inode, &map, flags);
                if (ret <= 0) {
 #ifdef EXT4FS_DEBUG
-                        WARN_ON(ret <= 0);
+                        ext4_warning(inode->i_sb,
-                        printk(KERN_ERR "%s: ext4_ext_map_blocks "
+                                     "inode #%lu: block %u: len %u: "
-                                    "returned error inode#%lu, block=%u, "
+                                     "ext4_ext_map_blocks returned %d",
-                                    "max_blocks=%u", __func__,
+                                     inode->i_ino, map.m_lblk,
-                                    inode->i_ino, map.m_lblk, max_blocks);
+                                     map.m_len, ret);
 #endif
                        ext4_mark_inode_dirty(handle, inode);
                        ret2 = ext4_journal_stop(handle);
@@ -4529,21 +4453,19 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
        while (ret >= 0 && ret < max_blocks) {
                map.m_lblk += ret;
                map.m_len = (max_blocks -= ret);
-                handle = ext4_journal_start(inode, credits);
+                handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits);
                if (IS_ERR(handle)) {
                        ret = PTR_ERR(handle);
                        break;
                }
                ret = ext4_map_blocks(handle, inode, &map,
                                      EXT4_GET_BLOCKS_IO_CONVERT_EXT);
-                if (ret <= 0) {
+                if (ret <= 0)
-                        WARN_ON(ret <= 0);
+                        ext4_warning(inode->i_sb,
-                        ext4_msg(inode->i_sb, KERN_ERR,
+                                     "inode #%lu: block %u: len %u: "
-                                 "%s:%d: inode #%lu: block %u: len %u: "
+                                     "ext4_ext_map_blocks returned %d",
-                                 "ext4_ext_map_blocks returned %d",
+                                     inode->i_ino, map.m_lblk,
-                                 __func__, __LINE__, inode->i_ino, map.m_lblk,
+                                     map.m_len, ret);
-                                 map.m_len, ret);
-                }
                ext4_mark_inode_dirty(handle, inode);
                ret2 = ext4_journal_stop(handle);
                if (ret <= 0 || ret2 )
@@ -4553,42 +4475,48 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
 }
 /*
- * If newex is not existing extent (newex->ec_start equals zero) find
+ * If newes is not existing extent (newes->ec_pblk equals zero) find
- * delayed extent at start of newex and update newex accordingly and
+ * delayed extent at start of newes and update newes accordingly and
 * return start of the next delayed extent.
 *
- * If newex is existing extent (newex->ec_start is not equal zero)
+ * If newes is existing extent (newes->ec_pblk is not equal zero)
 * return start of next delayed extent or EXT_MAX_BLOCKS if no delayed
- * extent found. Leave newex unmodified.
+ * extent found. Leave newes unmodified.
 */
 static int ext4_find_delayed_extent(struct inode *inode,
-                                    struct ext4_ext_cache *newex)
+                                    struct extent_status *newes)
 {
        struct extent_status es;
-        ext4_lblk_t next_del;
+        ext4_lblk_t block, next_del;
-        es.start = newex->ec_block;
+        ext4_es_find_delayed_extent(inode, newes->es_lblk, &es);
-        next_del = ext4_es_find_extent(inode, &es);
-        if (newex->ec_start == 0) {
+        if (newes->es_pblk == 0) {
                /*
-                 * No extent in extent-tree contains block @newex->ec_start,
+                 * No extent in extent-tree contains block @newes->es_pblk,
                 * then the block may stay in 1)a hole or 2)delayed-extent.
                 */
-                if (es.len == 0)
+                if (es.es_len == 0)
                        /* A hole found. */
                        return 0;
-                if (es.start > newex->ec_block) {
+                if (es.es_lblk > newes->es_lblk) {
                        /* A hole found. */
-                        newex->ec_len = min(es.start - newex->ec_block,
+                        newes->es_len = min(es.es_lblk - newes->es_lblk,
-                                            newex->ec_len);
+                                            newes->es_len);
                        return 0;
                }
-                newex->ec_len = es.start + es.len - newex->ec_block;
+                newes->es_len = es.es_lblk + es.es_len - newes->es_lblk;
        }
+        block = newes->es_lblk + newes->es_len;
+        ext4_es_find_delayed_extent(inode, block, &es);
+        if (es.es_len == 0)
+                next_del = EXT_MAX_BLOCKS;
+        else
+                next_del = es.es_lblk;
        return next_del;
 }
 /* fiemap flags we can handle specified here */
@@ -4643,7 +4571,7 @@ static int ext4_xattr_fiemap(struct inode *inode,
 */
 int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
 {
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(file);
        struct super_block *sb = inode->i_sb;
        ext4_lblk_t first_block, stop_block;
        struct address_space *mapping = inode->i_mapping;
@@ -4709,7 +4637,7 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
        inode_dio_wait(inode);
        credits = ext4_writepage_trans_blocks(inode);
-        handle = ext4_journal_start(inode, credits);
+        handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
        if (IS_ERR(handle)) {
                err = PTR_ERR(handle);
                goto out_dio;
@@ -4786,14 +4714,12 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
                goto out;
        down_write(&EXT4_I(inode)->i_data_sem);
-        ext4_ext_invalidate_cache(inode);
        ext4_discard_preallocations(inode);
        err = ext4_es_remove_extent(inode, first_block,
                                    stop_block - first_block);
        err = ext4_ext_remove_space(inode, first_block, stop_block - 1);
-        ext4_ext_invalidate_cache(inode);
        ext4_discard_preallocations(inode);
        if (IS_SYNC(inode))
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 564d981a2fcc..f768f4a98a2b 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -23,40 +23,53 @@
 * (e.g. Reservation space warning), and provide extent-level locking.
 * Delay extent tree is the first step to achieve this goal.  It is
 * original built by Yongqiang Yang.  At that time it is called delay
- * extent tree, whose goal is only track delay extent in memory to
+ * extent tree, whose goal is only track delayed extents in memory to
 * simplify the implementation of fiemap and bigalloc, and introduce
 * lseek SEEK_DATA/SEEK_HOLE support.  That is why it is still called
- * delay extent tree at the following comment.  But for better
+ * delay extent tree at the first commit.  But for better understand
- * understand what it does, it has been rename to extent status tree.
+ * what it does, it has been rename to extent status tree.
 *
- * Currently the first step has been done.  All delay extents are
+ * Step1:
- * tracked in the tree.  It maintains the delay extent when a delay
+ * Currently the first step has been done.  All delayed extents are
- * allocation is issued, and the delay extent is written out or
+ * tracked in the tree.  It maintains the delayed extent when a delayed
+ * allocation is issued, and the delayed extent is written out or
 * invalidated.  Therefore the implementation of fiemap and bigalloc
 * are simplified, and SEEK_DATA/SEEK_HOLE are introduced.
 *
 * The following comment describes the implemenmtation of extent
 * status tree and future works.
+ *
+ * Step2:
+ * In this step all extent status are tracked by extent status tree.
+ * Thus, we can first try to lookup a block mapping in this tree before
+ * finding it in extent tree.  Hence, single extent cache can be removed
+ * because extent status tree can do a better job.  Extents in status
+ * tree are loaded on-demand.  Therefore, the extent status tree may not
+ * contain all of the extents in a file.  Meanwhile we define a shrinker
+ * to reclaim memory from extent status tree because fragmented extent
+ * tree will make status tree cost too much memory.  written/unwritten/-
+ * hole extents in the tree will be reclaimed by this shrinker when we
+ * are under high memory pressure.  Delayed extents will not be
+ * reclimed because fiemap, bigalloc, and seek_data/hole need it.
 */
 /*
- * extents status tree implementation for ext4.
+ * Extent status tree implementation for ext4.
 *
 *
 * ==========================================================================
- * Extents status encompass delayed extents and extent locks
+ * Extent status tree tracks all extent status.
 *
- * 1. Why delayed extent implementation ?
+ * 1. Why we need to implement extent status tree?
 *
- * Without delayed extent, ext4 identifies a delayed extent by looking
+ * Without extent status tree, ext4 identifies a delayed extent by looking
 * up page cache, this has several deficiencies - complicated, buggy,
 * and inefficient code.
 *
- * FIEMAP, SEEK_HOLE/DATA, bigalloc, punch hole and writeout all need
+ * FIEMAP, SEEK_HOLE/DATA, bigalloc, and writeout all need to know if a
- * to know if a block or a range of blocks are belonged to a delayed
+ * block or a range of blocks are belonged to a delayed extent.
- * extent.
 *
- * Let us have a look at how they do without delayed extents implementation.
+ * Let us have a look at how they do without extent status tree.
 *   -- FIEMAP
 *      FIEMAP looks up page cache to identify delayed allocations from holes.
 *
@@ -68,47 +81,48 @@
 *      already under delayed allocation or not to determine whether
 *      quota reserving is needed for the cluster.
 *
- *   -- punch hole
- *      punch hole looks up page cache to identify a delayed extent.
- *
 *   -- writeout
 *      Writeout looks up whole page cache to see if a buffer is
 *      mapped, If there are not very many delayed buffers, then it is
 *      time comsuming.
 *
- * With delayed extents implementation, FIEMAP, SEEK_HOLE/DATA,
+ * With extent status tree implementation, FIEMAP, SEEK_HOLE/DATA,
 * bigalloc and writeout can figure out if a block or a range of
 * blocks is under delayed allocation(belonged to a delayed extent) or
- * not by searching the delayed extent tree.
+ * not by searching the extent tree.
 *
 *
 * ==========================================================================
- * 2. ext4 delayed extents impelmentation
+ * 2. Ext4 extent status tree impelmentation
+ *
+ *   -- extent
+ *      A extent is a range of blocks which are contiguous logically and
+ *      physically.  Unlike extent in extent tree, this extent in ext4 is
+ *      a in-memory struct, there is no corresponding on-disk data.  There
+ *      is no limit on length of extent, so an extent can contain as many
+ *      blocks as they are contiguous logically and physically.
 *
- *   -- delayed extent
+ *   -- extent status tree
- *      A delayed extent is a range of blocks which are contiguous
+ *      Every inode has an extent status tree and all allocation blocks
- *      logically and under delayed allocation.  Unlike extent in
+ *      are added to the tree with different status.  The extent in the
- *      ext4, delayed extent in ext4 is a in-memory struct, there is
+ *      tree are ordered by logical block no.
- *      no corresponding on-disk data.  There is no limit on length of
- *      delayed extent, so a delayed extent can contain as many blocks
- *      as they are contiguous logically.
 *
- *   -- delayed extent tree
+ *   -- operations on a extent status tree
- *      Every inode has a delayed extent tree and all under delayed
+ *      There are three important operations on a delayed extent tree: find
- *      allocation blocks are added to the tree as delayed extents.
+ *      next extent, adding a extent(a range of blocks) and removing a extent.
- *      Delayed extents in the tree are ordered by logical block no.
 *
- *   -- operations on a delayed extent tree
+ *   -- race on a extent status tree
- *      There are three operations on a delayed extent tree: find next
+ *      Extent status tree is protected by inode->i_es_lock.
- *      delayed extent, adding a space(a range of blocks) and removing
- *      a space.
 *
- *   -- race on a delayed extent tree
+ *   -- memory consumption
- *      Delayed extent tree is protected inode->i_es_lock.
+ *      Fragmented extent tree will make extent status tree cost too much
+ *      memory.  Hence, we will reclaim written/unwritten/hole extents from
+ *      the tree under a heavy memory pressure.
 *
 *
 * ==========================================================================
- * 3. performance analysis
+ * 3. Performance analysis
+ *
 *   -- overhead
 *      1. There is a cache extent for write access, so if writes are
 *      not very random, adding space operaions are in O(1) time.
@@ -120,15 +134,21 @@
 *
 * ==========================================================================
 * 4. TODO list
- *   -- Track all extent status
 *
- *   -- Improve get block process
+ *   -- Refactor delayed space reservation
 *
 *   -- Extent-level locking
 */
 static struct kmem_cache *ext4_es_cachep;
+static int __es_insert_extent(struct inode *inode, struct extent_status *newes);
+static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
+                              ext4_lblk_t end);
+static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
+                                       int nr_to_scan);
+static int ext4_es_reclaim_extents_count(struct super_block *sb);
 int __init ext4_init_es(void)
 {
        ext4_es_cachep = KMEM_CACHE(extent_status, SLAB_RECLAIM_ACCOUNT);
@@ -161,7 +181,9 @@ static void ext4_es_print_tree(struct inode *inode)
        while (node) {
                struct extent_status *es;
                es = rb_entry(node, struct extent_status, rb_node);
-                printk(KERN_DEBUG " [%u/%u)", es->start, es->len);
+                printk(KERN_DEBUG " [%u/%u) %llu %llx",
+                       es->es_lblk, es->es_len,
+                       ext4_es_pblock(es), ext4_es_status(es));
                node = rb_next(node);
        }
        printk(KERN_DEBUG "\n");
@@ -170,10 +192,10 @@ static void ext4_es_print_tree(struct inode *inode)
 #define ext4_es_print_tree(inode)
 #endif
-static inline ext4_lblk_t extent_status_end(struct extent_status *es)
+static inline ext4_lblk_t ext4_es_end(struct extent_status *es)
 {
-        BUG_ON(es->start + es->len < es->start);
+        BUG_ON(es->es_lblk + es->es_len < es->es_lblk);
-        return es->start + es->len - 1;
+        return es->es_lblk + es->es_len - 1;
 }
 /*
@@ -181,25 +203,25 @@ static inline ext4_lblk_t extent_status_end(struct extent_status *es)
 * it can't be found, try to find next extent.
 */
 static struct extent_status *__es_tree_search(struct rb_root *root,
-                                              ext4_lblk_t offset)
+                                              ext4_lblk_t lblk)
 {
        struct rb_node *node = root->rb_node;
        struct extent_status *es = NULL;
        while (node) {
                es = rb_entry(node, struct extent_status, rb_node);
-                if (offset < es->start)
+                if (lblk < es->es_lblk)
                        node = node->rb_left;
-                else if (offset > extent_status_end(es))
+                else if (lblk > ext4_es_end(es))
                        node = node->rb_right;
                else
                        return es;
        }
-        if (es && offset < es->start)
+        if (es && lblk < es->es_lblk)
                return es;
-        if (es && offset > extent_status_end(es)) {
+        if (es && lblk > ext4_es_end(es)) {
                node = rb_next(&es->rb_node);
                return node ? rb_entry(node, struct extent_status, rb_node) :
                              NULL;
@@ -209,79 +231,121 @@ static struct extent_status *__es_tree_search(struct rb_root *root,
 }
 /*
- * ext4_es_find_extent: find the 1st delayed extent covering @es->start
+ * ext4_es_find_delayed_extent: find the 1st delayed extent covering @es->lblk
- * if it exists, otherwise, the next extent after @es->start.
+ * if it exists, otherwise, the next extent after @es->lblk.
 *
 * @inode: the inode which owns delayed extents
+ * @lblk: the offset where we start to search
 * @es: delayed extent that we found
- *
- * Returns the first block of the next extent after es, otherwise
- * EXT_MAX_BLOCKS if no delay extent is found.
- * Delayed extent is returned via @es.
 */
-ext4_lblk_t ext4_es_find_extent(struct inode *inode, struct extent_status *es)
+void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
+                                 struct extent_status *es)
 {
        struct ext4_es_tree *tree = NULL;
        struct extent_status *es1 = NULL;
        struct rb_node *node;
-        ext4_lblk_t ret = EXT_MAX_BLOCKS;
-        trace_ext4_es_find_extent_enter(inode, es->start);
+        BUG_ON(es == NULL);
+        trace_ext4_es_find_delayed_extent_enter(inode, lblk);
        read_lock(&EXT4_I(inode)->i_es_lock);
        tree = &EXT4_I(inode)->i_es_tree;
-        /* find delay extent in cache firstly */
+        /* find extent in cache firstly */
+        es->es_lblk = es->es_len = es->es_pblk = 0;
        if (tree->cache_es) {
                es1 = tree->cache_es;
-                if (in_range(es->start, es1->start, es1->len)) {
+                if (in_range(lblk, es1->es_lblk, es1->es_len)) {
-                        es_debug("%u cached by [%u/%u)\n",
+                        es_debug("%u cached by [%u/%u) %llu %llx\n",
-                                 es->start, es1->start, es1->len);
+                                 lblk, es1->es_lblk, es1->es_len,
+                                 ext4_es_pblock(es1), ext4_es_status(es1));
                        goto out;
                }
        }
-        es->len = 0;
+        es1 = __es_tree_search(&tree->root, lblk);
-        es1 = __es_tree_search(&tree->root, es->start);
 out:
-        if (es1) {
+        if (es1 && !ext4_es_is_delayed(es1)) {
-                tree->cache_es = es1;
+                while ((node = rb_next(&es1->rb_node)) != NULL) {
-                es->start = es1->start;
-                es->len = es1->len;
-                node = rb_next(&es1->rb_node);
-                if (node) {
                        es1 = rb_entry(node, struct extent_status, rb_node);
-                        ret = es1->start;
+                        if (ext4_es_is_delayed(es1))
+                                break;
                }
        }
+        if (es1 && ext4_es_is_delayed(es1)) {
+                tree->cache_es = es1;
+                es->es_lblk = es1->es_lblk;
+                es->es_len = es1->es_len;
+                es->es_pblk = es1->es_pblk;
+        }
        read_unlock(&EXT4_I(inode)->i_es_lock);
-        trace_ext4_es_find_extent_exit(inode, es, ret);
+        ext4_es_lru_add(inode);
-        return ret;
+        trace_ext4_es_find_delayed_extent_exit(inode, es);
 }
 static struct extent_status *
-ext4_es_alloc_extent(ext4_lblk_t start, ext4_lblk_t len)
+ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
+                     ext4_fsblk_t pblk)
 {
        struct extent_status *es;
        es = kmem_cache_alloc(ext4_es_cachep, GFP_ATOMIC);
        if (es == NULL)
                return NULL;
-        es->start = start;
+        es->es_lblk = lblk;
-        es->len = len;
+        es->es_len = len;
+        es->es_pblk = pblk;
+        /*
+         * We don't count delayed extent because we never try to reclaim them
+         */
+        if (!ext4_es_is_delayed(es))
+                EXT4_I(inode)->i_es_lru_nr++;
        return es;
 }
-static void ext4_es_free_extent(struct extent_status *es)
+static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
 {
+        /* Decrease the lru counter when this es is not delayed */
+        if (!ext4_es_is_delayed(es)) {
+                BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0);
+                EXT4_I(inode)->i_es_lru_nr--;
+        }
        kmem_cache_free(ext4_es_cachep, es);
 }
+/*
+ * Check whether or not two extents can be merged
+ * Condition:
+ *  - logical block number is contiguous
+ *  - physical block number is contiguous
+ *  - status is equal
+ */
+static int ext4_es_can_be_merged(struct extent_status *es1,
+                                 struct extent_status *es2)
+{
+        if (es1->es_lblk + es1->es_len != es2->es_lblk)
+                return 0;
+        if (ext4_es_status(es1) != ext4_es_status(es2))
+                return 0;
+        if ((ext4_es_is_written(es1) || ext4_es_is_unwritten(es1)) &&
+            (ext4_es_pblock(es1) + es1->es_len != ext4_es_pblock(es2)))
+                return 0;
+        return 1;
+}
 static struct extent_status *
-ext4_es_try_to_merge_left(struct ext4_es_tree *tree, struct extent_status *es)
+ext4_es_try_to_merge_left(struct inode *inode, struct extent_status *es)
 {
+        struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
        struct extent_status *es1;
        struct rb_node *node;
@@ -290,10 +354,10 @@ ext4_es_try_to_merge_left(struct ext4_es_tree *tree, struct extent_status *es)
                return es;
        es1 = rb_entry(node, struct extent_status, rb_node);
-        if (es->start == extent_status_end(es1) + 1) {
+        if (ext4_es_can_be_merged(es1, es)) {
-                es1->len += es->len;
+                es1->es_len += es->es_len;
                rb_erase(&es->rb_node, &tree->root);
-                ext4_es_free_extent(es);
+                ext4_es_free_extent(inode, es);
                es = es1;
        }
@@ -301,8 +365,9 @@ ext4_es_try_to_merge_left(struct ext4_es_tree *tree, struct extent_status *es)
 }
 static struct extent_status *
-ext4_es_try_to_merge_right(struct ext4_es_tree *tree, struct extent_status *es)
+ext4_es_try_to_merge_right(struct inode *inode, struct extent_status *es)
 {
+        struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
        struct extent_status *es1;
        struct rb_node *node;
@@ -311,69 +376,57 @@ ext4_es_try_to_merge_right(struct ext4_es_tree *tree, struct extent_status *es)
                return es;
        es1 = rb_entry(node, struct extent_status, rb_node);
-        if (es1->start == extent_status_end(es) + 1) {
+        if (ext4_es_can_be_merged(es, es1)) {
-                es->len += es1->len;
+                es->es_len += es1->es_len;
                rb_erase(node, &tree->root);
-                ext4_es_free_extent(es1);
+                ext4_es_free_extent(inode, es1);
        }
        return es;
 }
-static int __es_insert_extent(struct ext4_es_tree *tree, ext4_lblk_t offset,
+static int __es_insert_extent(struct inode *inode, struct extent_status *newes)
-                              ext4_lblk_t len)
 {
+        struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
        struct rb_node **p = &tree->root.rb_node;
        struct rb_node *parent = NULL;
        struct extent_status *es;
-        ext4_lblk_t end = offset + len - 1;
-        BUG_ON(end < offset);
-        es = tree->cache_es;
-        if (es && offset == (extent_status_end(es) + 1)) {
-                es_debug("cached by [%u/%u)\n", es->start, es->len);
-                es->len += len;
-                es = ext4_es_try_to_merge_right(tree, es);
-                goto out;
-        } else if (es && es->start == end + 1) {
-                es_debug("cached by [%u/%u)\n", es->start, es->len);
-                es->start = offset;
-                es->len += len;
-                es = ext4_es_try_to_merge_left(tree, es);
-                goto out;
-        } else if (es && es->start <= offset &&
-                   end <= extent_status_end(es)) {
-                es_debug("cached by [%u/%u)\n", es->start, es->len);
-                goto out;
-        }
        while (*p) {
                parent = *p;
                es = rb_entry(parent, struct extent_status, rb_node);
-                if (offset < es->start) {
+                if (newes->es_lblk < es->es_lblk) {
-                        if (es->start == end + 1) {
+                        if (ext4_es_can_be_merged(newes, es)) {
-                                es->start = offset;
+                                /*
-                                es->len += len;
+                                 * Here we can modify es_lblk directly
-                                es = ext4_es_try_to_merge_left(tree, es);
+                                 * because it isn't overlapped.
+                                 */
+                                es->es_lblk = newes->es_lblk;
+                                es->es_len += newes->es_len;
+                                if (ext4_es_is_written(es) ||
+                                    ext4_es_is_unwritten(es))
+                                        ext4_es_store_pblock(es,
+                                                             newes->es_pblk);
+                                es = ext4_es_try_to_merge_left(inode, es);
                                goto out;
                        }
                        p = &(*p)->rb_left;
-                } else if (offset > extent_status_end(es)) {
+                } else if (newes->es_lblk > ext4_es_end(es)) {
-                        if (offset == extent_status_end(es) + 1) {
+                        if (ext4_es_can_be_merged(es, newes)) {
-                                es->len += len;
+                                es->es_len += newes->es_len;
-                                es = ext4_es_try_to_merge_right(tree, es);
+                                es = ext4_es_try_to_merge_right(inode, es);
                                goto out;
                        }
                        p = &(*p)->rb_right;
                } else {
-                        if (extent_status_end(es) <= end)
+                        BUG_ON(1);
-                                es->len = offset - es->start + len;
+                        return -EINVAL;
-                        goto out;
                }
        }
-        es = ext4_es_alloc_extent(offset, len);
+        es = ext4_es_alloc_extent(inode, newes->es_lblk, newes->es_len,
+                                  newes->es_pblk);
        if (!es)
                return -ENOMEM;
        rb_link_node(&es->rb_node, parent, p);
@@ -385,85 +438,166 @@ out:
 }
 /*
- * ext4_es_insert_extent() adds a space to a delayed extent tree.
+ * ext4_es_insert_extent() adds a space to a extent status tree.
- * Caller holds inode->i_es_lock.
 *
 * ext4_es_insert_extent is called by ext4_da_write_begin and
 * ext4_es_remove_extent.
 *
 * Return 0 on success, error code on failure.
 */
-int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t offset,
+int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
-                          ext4_lblk_t len)
+                          ext4_lblk_t len, ext4_fsblk_t pblk,
+                          unsigned long long status)
 {
-        struct ext4_es_tree *tree;
+        struct extent_status newes;
+        ext4_lblk_t end = lblk + len - 1;
        int err = 0;
-        trace_ext4_es_insert_extent(inode, offset, len);
+        es_debug("add [%u/%u) %llu %llx to extent status tree of inode %lu\n",
-        es_debug("add [%u/%u) to extent status tree of inode %lu\n",
+                 lblk, len, pblk, status, inode->i_ino);
-                 offset, len, inode->i_ino);
+        if (!len)
+                return 0;
+        BUG_ON(end < lblk);
+        newes.es_lblk = lblk;
+        newes.es_len = len;
+        ext4_es_store_pblock(&newes, pblk);
+        ext4_es_store_status(&newes, status);
+        trace_ext4_es_insert_extent(inode, &newes);
        write_lock(&EXT4_I(inode)->i_es_lock);
-        tree = &EXT4_I(inode)->i_es_tree;
+        err = __es_remove_extent(inode, lblk, end);
-        err = __es_insert_extent(tree, offset, len);
+        if (err != 0)
+                goto error;
+        err = __es_insert_extent(inode, &newes);
+error:
        write_unlock(&EXT4_I(inode)->i_es_lock);
+        ext4_es_lru_add(inode);
        ext4_es_print_tree(inode);
        return err;
 }
 /*
- * ext4_es_remove_extent() removes a space from a delayed extent tree.
+ * ext4_es_lookup_extent() looks up an extent in extent status tree.
- * Caller holds inode->i_es_lock.
 *
- * Return 0 on success, error code on failure.
+ * ext4_es_lookup_extent is called by ext4_map_blocks/ext4_da_map_blocks.
+ *
+ * Return: 1 on found, 0 on not
 */
-int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t offset,
+int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
-                          ext4_lblk_t len)
+                          struct extent_status *es)
 {
-        struct rb_node *node;
        struct ext4_es_tree *tree;
+        struct extent_status *es1 = NULL;
+        struct rb_node *node;
+        int found = 0;
+        trace_ext4_es_lookup_extent_enter(inode, lblk);
+        es_debug("lookup extent in block %u\n", lblk);
+        tree = &EXT4_I(inode)->i_es_tree;
+        read_lock(&EXT4_I(inode)->i_es_lock);
+        /* find extent in cache firstly */
+        es->es_lblk = es->es_len = es->es_pblk = 0;
+        if (tree->cache_es) {
+                es1 = tree->cache_es;
+                if (in_range(lblk, es1->es_lblk, es1->es_len)) {
+                        es_debug("%u cached by [%u/%u)\n",
+                                 lblk, es1->es_lblk, es1->es_len);
+                        found = 1;
+                        goto out;
+                }
+        }
+        node = tree->root.rb_node;
+        while (node) {
+                es1 = rb_entry(node, struct extent_status, rb_node);
+                if (lblk < es1->es_lblk)
+                        node = node->rb_left;
+                else if (lblk > ext4_es_end(es1))
+                        node = node->rb_right;
+                else {
+                        found = 1;
+                        break;
+                }
+        }
+out:
+        if (found) {
+                BUG_ON(!es1);
+                es->es_lblk = es1->es_lblk;
+                es->es_len = es1->es_len;
+                es->es_pblk = es1->es_pblk;
+        }
+        read_unlock(&EXT4_I(inode)->i_es_lock);
+        ext4_es_lru_add(inode);
+        trace_ext4_es_lookup_extent_exit(inode, es, found);
+        return found;
+}
+static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
+                              ext4_lblk_t end)
+{
+        struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
+        struct rb_node *node;
        struct extent_status *es;
        struct extent_status orig_es;
-        ext4_lblk_t len1, len2, end;
+        ext4_lblk_t len1, len2;
+        ext4_fsblk_t block;
        int err = 0;
-        trace_ext4_es_remove_extent(inode, offset, len);
+        es = __es_tree_search(&tree->root, lblk);
-        es_debug("remove [%u/%u) from extent status tree of inode %lu\n",
-                 offset, len, inode->i_ino);
-        end = offset + len - 1;
-        BUG_ON(end < offset);
-        write_lock(&EXT4_I(inode)->i_es_lock);
-        tree = &EXT4_I(inode)->i_es_tree;
-        es = __es_tree_search(&tree->root, offset);
        if (!es)
                goto out;
-        if (es->start > end)
+        if (es->es_lblk > end)
                goto out;
        /* Simply invalidate cache_es. */
        tree->cache_es = NULL;
-        orig_es.start = es->start;
+        orig_es.es_lblk = es->es_lblk;
-        orig_es.len = es->len;
+        orig_es.es_len = es->es_len;
-        len1 = offset > es->start ? offset - es->start : 0;
+        orig_es.es_pblk = es->es_pblk;
-        len2 = extent_status_end(es) > end ?
-               extent_status_end(es) - end : 0;
+        len1 = lblk > es->es_lblk ? lblk - es->es_lblk : 0;
+        len2 = ext4_es_end(es) > end ? ext4_es_end(es) - end : 0;
        if (len1 > 0)
-                es->len = len1;
+                es->es_len = len1;
        if (len2 > 0) {
                if (len1 > 0) {
-                        err = __es_insert_extent(tree, end + 1, len2);
+                        struct extent_status newes;
+                        newes.es_lblk = end + 1;
+                        newes.es_len = len2;
+                        if (ext4_es_is_written(&orig_es) ||
+                            ext4_es_is_unwritten(&orig_es)) {
+                                block = ext4_es_pblock(&orig_es) +
+                                        orig_es.es_len - len2;
+                                ext4_es_store_pblock(&newes, block);
+                        }
+                        ext4_es_store_status(&newes, ext4_es_status(&orig_es));
+                        err = __es_insert_extent(inode, &newes);
                        if (err) {
-                                es->start = orig_es.start;
+                                es->es_lblk = orig_es.es_lblk;
-                                es->len = orig_es.len;
+                                es->es_len = orig_es.es_len;
                                goto out;
                        }
                } else {
-                        es->start = end + 1;
+                        es->es_lblk = end + 1;
-                        es->len = len2;
+                        es->es_len = len2;
+                        if (ext4_es_is_written(es) ||
+                            ext4_es_is_unwritten(es)) {
+                                block = orig_es.es_pblk + orig_es.es_len - len2;
+                                ext4_es_store_pblock(es, block);
+                        }
                }
                goto out;
        }
@@ -476,10 +610,10 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t offset,
                        es = NULL;
        }
-        while (es && extent_status_end(es) <= end) {
+        while (es && ext4_es_end(es) <= end) {
                node = rb_next(&es->rb_node);
                rb_erase(&es->rb_node, &tree->root);
-                ext4_es_free_extent(es);
+                ext4_es_free_extent(inode, es);
                if (!node) {
                        es = NULL;
                        break;
@@ -487,14 +621,183 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t offset,
                es = rb_entry(node, struct extent_status, rb_node);
        }
-        if (es && es->start < end + 1) {
+        if (es && es->es_lblk < end + 1) {
-                len1 = extent_status_end(es) - end;
+                ext4_lblk_t orig_len = es->es_len;
-                es->start = end + 1;
-                es->len = len1;
+                len1 = ext4_es_end(es) - end;
+                es->es_lblk = end + 1;
+                es->es_len = len1;
+                if (ext4_es_is_written(es) || ext4_es_is_unwritten(es)) {
+                        block = es->es_pblk + orig_len - len1;
+                        ext4_es_store_pblock(es, block);
+                }
        }
 out:
+        return err;
+}
+/*
+ * ext4_es_remove_extent() removes a space from a extent status tree.
+ *
+ * Return 0 on success, error code on failure.
+ */
+int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
+                          ext4_lblk_t len)
+{
+        ext4_lblk_t end;
+        int err = 0;
+        trace_ext4_es_remove_extent(inode, lblk, len);
+        es_debug("remove [%u/%u) from extent status tree of inode %lu\n",
+                 lblk, len, inode->i_ino);
+        if (!len)
+                return err;
+        end = lblk + len - 1;
+        BUG_ON(end < lblk);
+        write_lock(&EXT4_I(inode)->i_es_lock);
+        err = __es_remove_extent(inode, lblk, end);
        write_unlock(&EXT4_I(inode)->i_es_lock);
        ext4_es_print_tree(inode);
        return err;
 }
+static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
+{
+        struct ext4_sb_info *sbi = container_of(shrink,
+                                        struct ext4_sb_info, s_es_shrinker);
+        struct ext4_inode_info *ei;
+        struct list_head *cur, *tmp, scanned;
+        int nr_to_scan = sc->nr_to_scan;
+        int ret, nr_shrunk = 0;
+        trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan);
+        if (!nr_to_scan)
+                return ext4_es_reclaim_extents_count(sbi->s_sb);
+        INIT_LIST_HEAD(&scanned);
+        spin_lock(&sbi->s_es_lru_lock);
+        list_for_each_safe(cur, tmp, &sbi->s_es_lru) {
+                list_move_tail(cur, &scanned);
+                ei = list_entry(cur, struct ext4_inode_info, i_es_lru);
+                read_lock(&ei->i_es_lock);
+                if (ei->i_es_lru_nr == 0) {
+                        read_unlock(&ei->i_es_lock);
+                        continue;
+                }
+                read_unlock(&ei->i_es_lock);
+                write_lock(&ei->i_es_lock);
+                ret = __es_try_to_reclaim_extents(ei, nr_to_scan);
+                write_unlock(&ei->i_es_lock);
+                nr_shrunk += ret;
+                nr_to_scan -= ret;
+                if (nr_to_scan == 0)
+                        break;
+        }
+        list_splice_tail(&scanned, &sbi->s_es_lru);
+        spin_unlock(&sbi->s_es_lru_lock);
+        trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk);
+        return ext4_es_reclaim_extents_count(sbi->s_sb);
+}
+void ext4_es_register_shrinker(struct super_block *sb)
+{
+        struct ext4_sb_info *sbi;
+        sbi = EXT4_SB(sb);
+        INIT_LIST_HEAD(&sbi->s_es_lru);
+        spin_lock_init(&sbi->s_es_lru_lock);
+        sbi->s_es_shrinker.shrink = ext4_es_shrink;
+        sbi->s_es_shrinker.seeks = DEFAULT_SEEKS;
+        register_shrinker(&sbi->s_es_shrinker);
+}
+void ext4_es_unregister_shrinker(struct super_block *sb)
+{
+        unregister_shrinker(&EXT4_SB(sb)->s_es_shrinker);
+}
+void ext4_es_lru_add(struct inode *inode)
+{
+        struct ext4_inode_info *ei = EXT4_I(inode);
+        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+        spin_lock(&sbi->s_es_lru_lock);
+        if (list_empty(&ei->i_es_lru))
+                list_add_tail(&ei->i_es_lru, &sbi->s_es_lru);
+        else
+                list_move_tail(&ei->i_es_lru, &sbi->s_es_lru);
+        spin_unlock(&sbi->s_es_lru_lock);
+}
+void ext4_es_lru_del(struct inode *inode)
+{
+        struct ext4_inode_info *ei = EXT4_I(inode);
+        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+        spin_lock(&sbi->s_es_lru_lock);
+        if (!list_empty(&ei->i_es_lru))
+                list_del_init(&ei->i_es_lru);
+        spin_unlock(&sbi->s_es_lru_lock);
+}
+static int ext4_es_reclaim_extents_count(struct super_block *sb)
+{
+        struct ext4_sb_info *sbi = EXT4_SB(sb);
+        struct ext4_inode_info *ei;
+        struct list_head *cur;
+        int nr_cached = 0;
+        spin_lock(&sbi->s_es_lru_lock);
+        list_for_each(cur, &sbi->s_es_lru) {
+                ei = list_entry(cur, struct ext4_inode_info, i_es_lru);
+                read_lock(&ei->i_es_lock);
+                nr_cached += ei->i_es_lru_nr;
+                read_unlock(&ei->i_es_lock);
+        }
+        spin_unlock(&sbi->s_es_lru_lock);
+        trace_ext4_es_reclaim_extents_count(sb, nr_cached);
+        return nr_cached;
+}
+static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
+                                       int nr_to_scan)
+{
+        struct inode *inode = &ei->vfs_inode;
+        struct ext4_es_tree *tree = &ei->i_es_tree;
+        struct rb_node *node;
+        struct extent_status *es;
+        int nr_shrunk = 0;
+        if (ei->i_es_lru_nr == 0)
+                return 0;
+        node = rb_first(&tree->root);
+        while (node != NULL) {
+                es = rb_entry(node, struct extent_status, rb_node);
+                node = rb_next(&es->rb_node);
+                /*
+                 * We can't reclaim delayed extent from status tree because
+                 * fiemap, bigallic, and seek_data/hole need to use it.
+                 */
+                if (!ext4_es_is_delayed(es)) {
+                        rb_erase(&es->rb_node, &tree->root);
+                        ext4_es_free_extent(inode, es);
+                        nr_shrunk++;
+                        if (--nr_to_scan == 0)
+                                break;
+                }
+        }
+        tree->cache_es = NULL;
+        return nr_shrunk;
+}
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
index 077f82db092a..f190dfe969da 100644
--- a/fs/ext4/extents_status.h
+++ b/fs/ext4/extents_status.h
@@ -20,10 +20,24 @@
 #define es_debug(fmt, ...)      no_printk(fmt, ##__VA_ARGS__)
 #endif
+/*
+ * These flags live in the high bits of extent_status.es_pblk
+ */
+#define EXTENT_STATUS_WRITTEN   (1ULL << 63)
+#define EXTENT_STATUS_UNWRITTEN (1ULL << 62)
+#define EXTENT_STATUS_DELAYED   (1ULL << 61)
+#define EXTENT_STATUS_HOLE      (1ULL << 60)
+#define EXTENT_STATUS_FLAGS     (EXTENT_STATUS_WRITTEN | \
+                                 EXTENT_STATUS_UNWRITTEN | \
+                                 EXTENT_STATUS_DELAYED | \
+                                 EXTENT_STATUS_HOLE)
 struct extent_status {
        struct rb_node rb_node;
-        ext4_lblk_t start;      /* first block extent covers */
+        ext4_lblk_t es_lblk;    /* first logical block extent covers */
-        ext4_lblk_t len;        /* length of extent in block */
+        ext4_lblk_t es_len;     /* length of extent in block */
+        ext4_fsblk_t es_pblk;   /* first physical block */
 };
 struct ext4_es_tree {
@@ -35,11 +49,69 @@ extern int __init ext4_init_es(void);
 extern void ext4_exit_es(void);
 extern void ext4_es_init_tree(struct ext4_es_tree *tree);
-extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t start,
+extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
+                                 ext4_lblk_t len, ext4_fsblk_t pblk,
+                                 unsigned long long status);
+extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
                                 ext4_lblk_t len);
-extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t start,
+extern void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
-                                 ext4_lblk_t len);
+                                        struct extent_status *es);
-extern ext4_lblk_t ext4_es_find_extent(struct inode *inode,
+extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
-                                struct extent_status *es);
+                                 struct extent_status *es);
+static inline int ext4_es_is_written(struct extent_status *es)
+{
+        return (es->es_pblk & EXTENT_STATUS_WRITTEN) != 0;
+}
+static inline int ext4_es_is_unwritten(struct extent_status *es)
+{
+        return (es->es_pblk & EXTENT_STATUS_UNWRITTEN) != 0;
+}
+static inline int ext4_es_is_delayed(struct extent_status *es)
+{
+        return (es->es_pblk & EXTENT_STATUS_DELAYED) != 0;
+}
+static inline int ext4_es_is_hole(struct extent_status *es)
+{
+        return (es->es_pblk & EXTENT_STATUS_HOLE) != 0;
+}
+static inline ext4_fsblk_t ext4_es_status(struct extent_status *es)
+{
+        return (es->es_pblk & EXTENT_STATUS_FLAGS);
+}
+static inline ext4_fsblk_t ext4_es_pblock(struct extent_status *es)
+{
+        return (es->es_pblk & ~EXTENT_STATUS_FLAGS);
+}
+static inline void ext4_es_store_pblock(struct extent_status *es,
+                                        ext4_fsblk_t pb)
+{
+        ext4_fsblk_t block;
+        block = (pb & ~EXTENT_STATUS_FLAGS) |
+                (es->es_pblk & EXTENT_STATUS_FLAGS);
+        es->es_pblk = block;
+}
+static inline void ext4_es_store_status(struct extent_status *es,
+                                        unsigned long long status)
+{
+        ext4_fsblk_t block;
+        block = (status & EXTENT_STATUS_FLAGS) |
+                (es->es_pblk & ~EXTENT_STATUS_FLAGS);
+        es->es_pblk = block;
+}
+extern void ext4_es_register_shrinker(struct super_block *sb);
+extern void ext4_es_unregister_shrinker(struct super_block *sb);
+extern void ext4_es_lru_add(struct inode *inode);
+extern void ext4_es_lru_del(struct inode *inode);
 #endif /* _EXT4_EXTENTS_STATUS_H */
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 405565a62277..64848b595b24 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -167,7 +167,7 @@ static ssize_t
 ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
                unsigned long nr_segs, loff_t pos)
 {
-        struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(iocb->ki_filp);
        ssize_t ret;
        /*
@@ -240,7 +240,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
                        handle_t *handle;
                        int err;
-                        handle = ext4_journal_start_sb(sb, 1);
+                        handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1);
                        if (IS_ERR(handle))
                                return PTR_ERR(handle);
                        err = ext4_journal_get_write_access(handle, sbi->s_sbh);
@@ -464,10 +464,8 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
                 * If there is a delay extent at this offset,
                 * it will be as a data.
                 */
-                es.start = last;
+                ext4_es_find_delayed_extent(inode, last, &es);
-                (void)ext4_es_find_extent(inode, &es);
+                if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
-                if (last >= es.start &&
-                    last < es.start + es.len) {
                        if (last != start)
                                dataoff = last << blkbits;
                        break;
@@ -549,11 +547,9 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
                 * If there is a delay extent at this offset,
                 * we will skip this extent.
                 */
-                es.start = last;
+                ext4_es_find_delayed_extent(inode, last, &es);
-                (void)ext4_es_find_extent(inode, &es);
+                if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
-                if (last >= es.start &&
+                        last = es.es_lblk + es.es_len;
-                    last < es.start + es.len) {
-                        last = es.start + es.len;
                        holeoff = last << blkbits;
                        continue;
                }
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index fa8e4911d354..3d586f02883e 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -155,11 +155,11 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
        /* Check to see if the seed is all zero's */
        if (hinfo->seed) {
                for (i = 0; i < 4; i++) {
-                        if (hinfo->seed[i])
+                        if (hinfo->seed[i]) {
+                                memcpy(buf, hinfo->seed, sizeof(buf));
                                break;
+                        }
                }
-                if (i < 4)
-                        memcpy(buf, hinfo->seed, sizeof(buf));
        }
        switch (hinfo->hash_version) {
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 3f32c8012447..32fd2b9075dd 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -634,8 +634,10 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
 * For other inodes, search forward from the parent directory's block
 * group to find a free inode.
 */
-struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, umode_t mode,
+struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
-                             const struct qstr *qstr, __u32 goal, uid_t *owner)
+                               umode_t mode, const struct qstr *qstr,
+                               __u32 goal, uid_t *owner, int handle_type,
+                               unsigned int line_no, int nblocks)
 {
        struct super_block *sb;
        struct buffer_head *inode_bitmap_bh = NULL;
@@ -725,6 +727,15 @@ repeat_in_this_group:
                                   "inode=%lu", ino + 1);
                        continue;
                }
+                if (!handle) {
+                        BUG_ON(nblocks <= 0);
+                        handle = __ext4_journal_start_sb(dir->i_sb, line_no,
+                                                         handle_type, nblocks);
+                        if (IS_ERR(handle)) {
+                                err = PTR_ERR(handle);
+                                goto fail;
+                        }
+                }
                BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
                err = ext4_journal_get_write_access(handle, inode_bitmap_bh);
                if (err)
@@ -1017,17 +1028,17 @@ iget_failed:
        inode = NULL;
 bad_orphan:
        ext4_warning(sb, "bad orphan inode %lu!  e2fsck was run?", ino);
-        printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n",
+        printk(KERN_WARNING "ext4_test_bit(bit=%d, block=%llu) = %d\n",
               bit, (unsigned long long)bitmap_bh->b_blocknr,
               ext4_test_bit(bit, bitmap_bh->b_data));
-        printk(KERN_NOTICE "inode=%p\n", inode);
+        printk(KERN_WARNING "inode=%p\n", inode);
        if (inode) {
-                printk(KERN_NOTICE "is_bad_inode(inode)=%d\n",
+                printk(KERN_WARNING "is_bad_inode(inode)=%d\n",
                       is_bad_inode(inode));
-                printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n",
+                printk(KERN_WARNING "NEXT_ORPHAN(inode)=%u\n",
                       NEXT_ORPHAN(inode));
-                printk(KERN_NOTICE "max_ino=%lu\n", max_ino);
+                printk(KERN_WARNING "max_ino=%lu\n", max_ino);
-                printk(KERN_NOTICE "i_nlink=%u\n", inode->i_nlink);
+                printk(KERN_WARNING "i_nlink=%u\n", inode->i_nlink);
                /* Avoid freeing blocks if we got a bad deleted inode */
                if (inode->i_nlink == 0)
                        inode->i_blocks = 0;
@@ -1137,7 +1148,7 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
        if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))
                goto out;
-        handle = ext4_journal_start_sb(sb, 1);
+        handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1);
        if (IS_ERR(handle)) {
                ret = PTR_ERR(handle);
                goto out;
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 20862f96e8ae..c541ab8b64dd 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -146,6 +146,7 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
        struct super_block *sb = inode->i_sb;
        Indirect *p = chain;
        struct buffer_head *bh;
+        int ret = -EIO;
        *err = 0;
        /* i_data is not going away, no lock needed */
@@ -154,8 +155,10 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
                goto no_block;
        while (--depth) {
                bh = sb_getblk(sb, le32_to_cpu(p->key));
-                if (unlikely(!bh))
+                if (unlikely(!bh)) {
+                        ret = -ENOMEM;
                        goto failure;
+                }
                if (!bh_uptodate_or_lock(bh)) {
                        if (bh_submit_read(bh) < 0) {
@@ -177,7 +180,7 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
        return NULL;
 failure:
-        *err = -EIO;
+        *err = ret;
 no_block:
        return p;
 }
@@ -355,9 +358,8 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
                         * for the first direct block
                         */
                        new_blocks[index] = current_block;
-                        printk(KERN_INFO "%s returned more blocks than "
+                        WARN(1, KERN_INFO "%s returned more blocks than "
                                                "requested\n", __func__);
-                        WARN_ON(1);
                        break;
                }
        }
@@ -471,7 +473,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
                 */
                bh = sb_getblk(inode->i_sb, new_blocks[n-1]);
                if (unlikely(!bh)) {
-                        err = -EIO;
+                        err = -ENOMEM;
                        goto failed;
                }
@@ -789,7 +791,7 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
                if (final_size > inode->i_size) {
                        /* Credits for sb + inode write */
-                        handle = ext4_journal_start(inode, 2);
+                        handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
                        if (IS_ERR(handle)) {
                                ret = PTR_ERR(handle);
                                goto out;
@@ -849,7 +851,7 @@ locked:
                int err;
                /* Credits for sb + inode write */
-                handle = ext4_journal_start(inode, 2);
+                handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
                if (IS_ERR(handle)) {
                        /* This is really bad luck. We've written the data
                         * but cannot extend i_size. Bail out and pretend
@@ -948,7 +950,8 @@ static handle_t *start_transaction(struct inode *inode)
 {
        handle_t *result;
-        result = ext4_journal_start(inode, ext4_blocks_for_truncate(inode));
+        result = ext4_journal_start(inode, EXT4_HT_TRUNCATE,
+                                    ext4_blocks_for_truncate(inode));
        if (!IS_ERR(result))
                return result;
@@ -1515,3 +1518,243 @@ out_stop:
        trace_ext4_truncate_exit(inode);
 }
+static int free_hole_blocks(handle_t *handle, struct inode *inode,
+                            struct buffer_head *parent_bh, __le32 *i_data,
+                            int level, ext4_lblk_t first,
+                            ext4_lblk_t count, int max)
+{
+        struct buffer_head *bh = NULL;
+        int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
+        int ret = 0;
+        int i, inc;
+        ext4_lblk_t offset;
+        __le32 blk;
+        inc = 1 << ((EXT4_BLOCK_SIZE_BITS(inode->i_sb) - 2) * level);
+        for (i = 0, offset = 0; i < max; i++, i_data++, offset += inc) {
+                if (offset >= count + first)
+                        break;
+                if (*i_data == 0 || (offset + inc) <= first)
+                        continue;
+                blk = *i_data;
+                if (level > 0) {
+                        ext4_lblk_t first2;
+                        bh = sb_bread(inode->i_sb, blk);
+                        if (!bh) {
+                                EXT4_ERROR_INODE_BLOCK(inode, blk,
+                                                       "Read failure");
+                                return -EIO;
+                        }
+                        first2 = (first > offset) ? first - offset : 0;
+                        ret = free_hole_blocks(handle, inode, bh,
+                                               (__le32 *)bh->b_data, level - 1,
+                                               first2, count - offset,
+                                               inode->i_sb->s_blocksize >> 2);
+                        if (ret) {
+                                brelse(bh);
+                                goto err;
+                        }
+                }
+                if (level == 0 ||
+                    (bh && all_zeroes((__le32 *)bh->b_data,
+                                      (__le32 *)bh->b_data + addr_per_block))) {
+                        ext4_free_data(handle, inode, parent_bh, &blk, &blk+1);
+                        *i_data = 0;
+                }
+                brelse(bh);
+                bh = NULL;
+        }
+err:
+        return ret;
+}
+static int ext4_free_hole_blocks(handle_t *handle, struct inode *inode,
+                                 ext4_lblk_t first, ext4_lblk_t stop)
+{
+        int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
+        int level, ret = 0;
+        int num = EXT4_NDIR_BLOCKS;
+        ext4_lblk_t count, max = EXT4_NDIR_BLOCKS;
+        __le32 *i_data = EXT4_I(inode)->i_data;
+        count = stop - first;
+        for (level = 0; level < 4; level++, max *= addr_per_block) {
+                if (first < max) {
+                        ret = free_hole_blocks(handle, inode, NULL, i_data,
+                                               level, first, count, num);
+                        if (ret)
+                                goto err;
+                        if (count > max - first)
+                                count -= max - first;
+                        else
+                                break;
+                        first = 0;
+                } else {
+                        first -= max;
+                }
+                i_data += num;
+                if (level == 0) {
+                        num = 1;
+                        max = 1;
+                }
+        }
+err:
+        return ret;
+}
+int ext4_ind_punch_hole(struct file *file, loff_t offset, loff_t length)
+{
+        struct inode *inode = file->f_path.dentry->d_inode;
+        struct super_block *sb = inode->i_sb;
+        ext4_lblk_t first_block, stop_block;
+        struct address_space *mapping = inode->i_mapping;
+        handle_t *handle = NULL;
+        loff_t first_page, last_page, page_len;
+        loff_t first_page_offset, last_page_offset;
+        int err = 0;
+        /*
+         * Write out all dirty pages to avoid race conditions
+         * Then release them.
+         */
+        if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
+                err = filemap_write_and_wait_range(mapping,
+                        offset, offset + length - 1);
+                if (err)
+                        return err;
+        }
+        mutex_lock(&inode->i_mutex);
+        /* It's not possible punch hole on append only file */
+        if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
+                err = -EPERM;
+                goto out_mutex;
+        }
+        if (IS_SWAPFILE(inode)) {
+                err = -ETXTBSY;
+                goto out_mutex;
+        }
+        /* No need to punch hole beyond i_size */
+        if (offset >= inode->i_size)
+                goto out_mutex;
+        /*
+         * If the hole extents beyond i_size, set the hole
+         * to end after the page that contains i_size
+         */
+        if (offset + length > inode->i_size) {
+                length = inode->i_size +
+                    PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) -
+                    offset;
+        }
+        first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+        last_page = (offset + length) >> PAGE_CACHE_SHIFT;
+        first_page_offset = first_page << PAGE_CACHE_SHIFT;
+        last_page_offset = last_page << PAGE_CACHE_SHIFT;
+        /* Now release the pages */
+        if (last_page_offset > first_page_offset) {
+                truncate_pagecache_range(inode, first_page_offset,
+                                         last_page_offset - 1);
+        }
+        /* Wait all existing dio works, newcomers will block on i_mutex */
+        inode_dio_wait(inode);
+        handle = start_transaction(inode);
+        if (IS_ERR(handle))
+                goto out_mutex;
+        /*
+         * Now we need to zero out the non-page-aligned data in the
+         * pages at the start and tail of the hole, and unmap the buffer
+         * heads for the block aligned regions of the page that were
+         * completely zerod.
+         */
+        if (first_page > last_page) {
+                /*
+                 * If the file space being truncated is contained within a page
+                 * just zero out and unmap the middle of that page
+                 */
+                err = ext4_discard_partial_page_buffers(handle,
+                        mapping, offset, length, 0);
+                if (err)
+                        goto out;
+        } else {
+                /*
+                 * Zero out and unmap the paritial page that contains
+                 * the start of the hole
+                 */
+                page_len = first_page_offset - offset;
+                if (page_len > 0) {
+                        err = ext4_discard_partial_page_buffers(handle, mapping,
+                                                        offset, page_len, 0);
+                        if (err)
+                                goto out;
+                }
+                /*
+                 * Zero out and unmap the partial page that contains
+                 * the end of the hole
+                 */
+                page_len = offset + length - last_page_offset;
+                if (page_len > 0) {
+                        err = ext4_discard_partial_page_buffers(handle, mapping,
+                                                last_page_offset, page_len, 0);
+                        if (err)
+                                goto out;
+                }
+        }
+        /*
+         * If i_size contained in the last page, we need to
+         * unmap and zero the paritial page after i_size
+         */
+        if (inode->i_size >> PAGE_CACHE_SHIFT == last_page &&
+            inode->i_size % PAGE_CACHE_SIZE != 0) {
+                page_len = PAGE_CACHE_SIZE -
+                        (inode->i_size & (PAGE_CACHE_SIZE - 1));
+                if (page_len > 0) {
+                        err = ext4_discard_partial_page_buffers(handle,
+                                mapping, inode->i_size, page_len, 0);
+                        if (err)
+                                goto out;
+                }
+        }
+        first_block = (offset + sb->s_blocksize - 1) >>
+                EXT4_BLOCK_SIZE_BITS(sb);
+        stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);
+        if (first_block >= stop_block)
+                goto out;
+        down_write(&EXT4_I(inode)->i_data_sem);
+        ext4_discard_preallocations(inode);
+        err = ext4_es_remove_extent(inode, first_block,
+                                    stop_block - first_block);
+        err = ext4_free_hole_blocks(handle, inode, first_block, stop_block);
+        ext4_discard_preallocations(inode);
+        if (IS_SYNC(inode))
+                ext4_handle_sync(handle);
+        up_write(&EXT4_I(inode)->i_data_sem);
+out:
+        inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+        ext4_mark_inode_dirty(handle, inode);
+        ext4_journal_stop(handle);
+out_mutex:
+        mutex_unlock(&inode->i_mutex);
+        return err;
+}
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 387c47c6cda9..c0fd1a123f7d 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -545,7 +545,7 @@ static int ext4_convert_inline_data_to_extent(struct address_space *mapping,
                return ret;
 retry:
-        handle = ext4_journal_start(inode, needed_blocks);
+        handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks);
        if (IS_ERR(handle)) {
                ret = PTR_ERR(handle);
                handle = NULL;
@@ -657,7 +657,7 @@ int ext4_try_to_write_inline_data(struct address_space *mapping,
         * The possible write could happen in the inode,
         * so try to reserve the space in inode first.
         */
-        handle = ext4_journal_start(inode, 1);
+        handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
        if (IS_ERR(handle)) {
                ret = PTR_ERR(handle);
                handle = NULL;
@@ -853,7 +853,7 @@ int ext4_da_write_inline_data_begin(struct address_space *mapping,
        if (ret)
                return ret;
-        handle = ext4_journal_start(inode, 1);
+        handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
        if (IS_ERR(handle)) {
                ret = PTR_ERR(handle);
                handle = NULL;
@@ -1188,7 +1188,7 @@ static int ext4_convert_inline_data_nolock(handle_t *handle,
        data_bh = sb_getblk(inode->i_sb, map.m_pblk);
        if (!data_bh) {
-                error = -EIO;
+                error = -ENOMEM;
                goto out_restore;
        }
@@ -1298,7 +1298,7 @@ int ext4_read_inline_dir(struct file *filp,
        int i, stored;
        struct ext4_dir_entry_2 *de;
        struct super_block *sb;
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        int ret, inline_size = 0;
        struct ext4_iloc iloc;
        void *dir_buf = NULL;
@@ -1770,7 +1770,7 @@ void ext4_inline_data_truncate(struct inode *inode, int *has_inline)
        needed_blocks = ext4_writepage_trans_blocks(inode);
-        handle = ext4_journal_start(inode, needed_blocks);
+        handle = ext4_journal_start(inode, EXT4_HT_INODE, needed_blocks);
        if (IS_ERR(handle))
                return;
@@ -1862,7 +1862,7 @@ int ext4_convert_inline_data(struct inode *inode)
        if (error)
                return error;
-        handle = ext4_journal_start(inode, needed_blocks);
+        handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks);
        if (IS_ERR(handle)) {
                error = PTR_ERR(handle);
                goto out_free;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index cbfe13bf5b2a..9ea0cde3fa9e 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -132,10 +132,6 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode,
 }
 static void ext4_invalidatepage(struct page *page, unsigned long offset);
-static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
-                                   struct buffer_head *bh_result, int create);
-static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode);
-static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
 static int __ext4_journalled_writepage(struct page *page, unsigned int len);
 static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh);
 static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
@@ -238,7 +234,8 @@ void ext4_evict_inode(struct inode *inode)
         * protection against it
         */
        sb_start_intwrite(inode->i_sb);
-        handle = ext4_journal_start(inode, ext4_blocks_for_truncate(inode)+3);
+        handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE,
+                                    ext4_blocks_for_truncate(inode)+3);
        if (IS_ERR(handle)) {
                ext4_std_error(inode->i_sb, PTR_ERR(handle));
                /*
@@ -346,7 +343,7 @@ void ext4_da_update_reserve_space(struct inode *inode,
        spin_lock(&ei->i_block_reservation_lock);
        trace_ext4_da_update_reserve_space(inode, used, quota_claim);
        if (unlikely(used > ei->i_reserved_data_blocks)) {
-                ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d "
+                ext4_warning(inode->i_sb, "%s: ino %lu, used %d "
                         "with only %d reserved data blocks",
                         __func__, inode->i_ino, used,
                         ei->i_reserved_data_blocks);
@@ -355,10 +352,12 @@ void ext4_da_update_reserve_space(struct inode *inode,
        }
        if (unlikely(ei->i_allocated_meta_blocks > ei->i_reserved_meta_blocks)) {
-                ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, allocated %d "
+                ext4_warning(inode->i_sb, "ino %lu, allocated %d "
-                         "with only %d reserved metadata blocks\n", __func__,
+                        "with only %d reserved metadata blocks "
-                         inode->i_ino, ei->i_allocated_meta_blocks,
+                        "(releasing %d blocks with reserved %d data blocks)",
-                         ei->i_reserved_meta_blocks);
+                        inode->i_ino, ei->i_allocated_meta_blocks,
+                             ei->i_reserved_meta_blocks, used,
+                             ei->i_reserved_data_blocks);
                WARN_ON(1);
                ei->i_allocated_meta_blocks = ei->i_reserved_meta_blocks;
        }
@@ -508,12 +507,33 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
 int ext4_map_blocks(handle_t *handle, struct inode *inode,
                    struct ext4_map_blocks *map, int flags)
 {
+        struct extent_status es;
        int retval;
        map->m_flags = 0;
        ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u,"
                  "logical block %lu\n", inode->i_ino, flags, map->m_len,
                  (unsigned long) map->m_lblk);
+        /* Lookup extent status tree firstly */
+        if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
+                if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) {
+                        map->m_pblk = ext4_es_pblock(&es) +
+                                        map->m_lblk - es.es_lblk;
+                        map->m_flags |= ext4_es_is_written(&es) ?
+                                        EXT4_MAP_MAPPED : EXT4_MAP_UNWRITTEN;
+                        retval = es.es_len - (map->m_lblk - es.es_lblk);
+                        if (retval > map->m_len)
+                                retval = map->m_len;
+                        map->m_len = retval;
+                } else if (ext4_es_is_delayed(&es) || ext4_es_is_hole(&es)) {
+                        retval = 0;
+                } else {
+                        BUG_ON(1);
+                }
+                goto found;
+        }
        /*
         * Try to see if we can get the block without requesting a new
         * file system block.
@@ -527,20 +547,27 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
                retval = ext4_ind_map_blocks(handle, inode, map, flags &
                                             EXT4_GET_BLOCKS_KEEP_SIZE);
        }
+        if (retval > 0) {
+                int ret;
+                unsigned long long status;
+                status = map->m_flags & EXT4_MAP_UNWRITTEN ?
+                                EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
+                if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
+                    ext4_find_delalloc_range(inode, map->m_lblk,
+                                             map->m_lblk + map->m_len - 1))
+                        status |= EXTENT_STATUS_DELAYED;
+                ret = ext4_es_insert_extent(inode, map->m_lblk,
+                                            map->m_len, map->m_pblk, status);
+                if (ret < 0)
+                        retval = ret;
+        }
        if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
                up_read((&EXT4_I(inode)->i_data_sem));
+found:
        if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
-                int ret;
+                int ret = check_block_validity(inode, map);
-                if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
-                        /* delayed alloc may be allocated by fallocate and
-                         * coverted to initialized by directIO.
-                         * we need to handle delayed extent here.
-                         */
-                        down_write((&EXT4_I(inode)->i_data_sem));
-                        goto delayed_mapped;
-                }
-                ret = check_block_validity(inode, map);
                if (ret != 0)
                        return ret;
        }
@@ -560,16 +587,10 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
                return retval;
        /*
-         * When we call get_blocks without the create flag, the
+         * Here we clear m_flags because after allocating an new extent,
-         * BH_Unwritten flag could have gotten set if the blocks
+         * it will be set again.
-         * requested were part of a uninitialized extent.  We need to
-         * clear this flag now that we are committed to convert all or
-         * part of the uninitialized extent to be an initialized
-         * extent.  This is because we need to avoid the combination
-         * of BH_Unwritten and BH_Mapped flags being simultaneously
-         * set on the buffer_head.
         */
-        map->m_flags &= ~EXT4_MAP_UNWRITTEN;
+        map->m_flags &= ~EXT4_MAP_FLAGS;
        /*
         * New blocks allocate and/or writing to uninitialized extent
@@ -615,18 +636,23 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
                        (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE))
                        ext4_da_update_reserve_space(inode, retval, 1);
        }
-        if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
+        if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
                ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
-                if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
+        if (retval > 0) {
-                        int ret;
+                int ret;
-delayed_mapped:
+                unsigned long long status;
-                        /* delayed allocation blocks has been allocated */
-                        ret = ext4_es_remove_extent(inode, map->m_lblk,
+                status = map->m_flags & EXT4_MAP_UNWRITTEN ?
-                                                    map->m_len);
+                                EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
-                        if (ret < 0)
+                if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
-                                retval = ret;
+                    ext4_find_delalloc_range(inode, map->m_lblk,
-                }
+                                             map->m_lblk + map->m_len - 1))
+                        status |= EXTENT_STATUS_DELAYED;
+                ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
+                                            map->m_pblk, status);
+                if (ret < 0)
+                        retval = ret;
        }
        up_write((&EXT4_I(inode)->i_data_sem));
@@ -660,7 +686,8 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
                if (map.m_len > DIO_MAX_BLOCKS)
                        map.m_len = DIO_MAX_BLOCKS;
                dio_credits = ext4_chunk_trans_blocks(inode, map.m_len);
-                handle = ext4_journal_start(inode, dio_credits);
+                handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
+                                            dio_credits);
                if (IS_ERR(handle)) {
                        ret = PTR_ERR(handle);
                        return ret;
@@ -707,14 +734,16 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
        /* ensure we send some value back into *errp */
        *errp = 0;
+        if (create && err == 0)
+                err = -ENOSPC;  /* should never happen */
        if (err < 0)
                *errp = err;
        if (err <= 0)
                return NULL;
        bh = sb_getblk(inode->i_sb, map.m_pblk);
-        if (!bh) {
+        if (unlikely(!bh)) {
-                *errp = -EIO;
+                *errp = -ENOMEM;
                return NULL;
        }
        if (map.m_flags & EXT4_MAP_NEW) {
@@ -808,11 +837,10 @@ int ext4_walk_page_buffers(handle_t *handle,
 * and the commit_write().  So doing the jbd2_journal_start at the start of
 * prepare_write() is the right place.
 *
- * Also, this function can nest inside ext4_writepage() ->
+ * Also, this function can nest inside ext4_writepage().  In that case, we
- * block_write_full_page(). In that case, we *know* that ext4_writepage()
+ * *know* that ext4_writepage() has generated enough buffer credits to do the
- * has generated enough buffer credits to do the whole page.  So we won't
+ * whole page.  So we won't block on the journal in that case, which is good,
- * block on the journal in that case, which is good, because the caller may
+ * because the caller may be PF_MEMALLOC.
- * be PF_MEMALLOC.
 *
 * By accident, ext4 can be reentered when a transaction is open via
 * quota file writes.  If we were to commit the transaction while thus
@@ -878,32 +906,40 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
                ret = ext4_try_to_write_inline_data(mapping, inode, pos, len,
                                                    flags, pagep);
                if (ret < 0)
-                        goto out;
+                        return ret;
-                if (ret == 1) {
+                if (ret == 1)
-                        ret = 0;
+                        return 0;
-                        goto out;
-                }
        }
-retry:
+        /*
-        handle = ext4_journal_start(inode, needed_blocks);
+         * grab_cache_page_write_begin() can take a long time if the
+         * system is thrashing due to memory pressure, or if the page
+         * is being written back.  So grab it first before we start
+         * the transaction handle.  This also allows us to allocate
+         * the page (if needed) without using GFP_NOFS.
+         */
+retry_grab:
+        page = grab_cache_page_write_begin(mapping, index, flags);
+        if (!page)
+                return -ENOMEM;
+        unlock_page(page);
+retry_journal:
+        handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks);
        if (IS_ERR(handle)) {
-                ret = PTR_ERR(handle);
+                page_cache_release(page);
-                goto out;
+                return PTR_ERR(handle);
        }
-        /* We cannot recurse into the filesystem as the transaction is already
+        lock_page(page);
-         * started */
+        if (page->mapping != mapping) {
-        flags |= AOP_FLAG_NOFS;
+                /* The page got truncated from under us */
+                unlock_page(page);
-        page = grab_cache_page_write_begin(mapping, index, flags);
+                page_cache_release(page);
-        if (!page) {
                ext4_journal_stop(handle);
-                ret = -ENOMEM;
+                goto retry_grab;
-                goto out;
        }
+        wait_on_page_writeback(page);
-        *pagep = page;
        if (ext4_should_dioread_nolock(inode))
                ret = __block_write_begin(page, pos, len, ext4_get_block_write);
@@ -918,7 +954,6 @@ retry:
        if (ret) {
                unlock_page(page);
-                page_cache_release(page);
                /*
                 * __block_write_begin may have instantiated a few blocks
                 * outside i_size.  Trim these off again. Don't need
@@ -942,11 +977,14 @@ retry:
                        if (inode->i_nlink)
                                ext4_orphan_del(NULL, inode);
                }
-        }
-        if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
+                if (ret == -ENOSPC &&
-                goto retry;
+                    ext4_should_retry_alloc(inode->i_sb, &retries))
-out:
+                        goto retry_journal;
+                page_cache_release(page);
+                return ret;
+        }
+        *pagep = page;
        return ret;
 }
@@ -1256,7 +1294,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
                 * function is called from invalidate page, it's
                 * harmless to return without any action.
                 */
-                ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: "
+                ext4_warning(inode->i_sb, "ext4_da_release_space: "
                         "ino %lu, to_free %d with only %d reserved "
                         "data blocks", inode->i_ino, to_free,
                         ei->i_reserved_data_blocks);
@@ -1357,7 +1395,6 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
        loff_t size = i_size_read(inode);
        unsigned int len, block_start;
        struct buffer_head *bh, *page_bufs = NULL;
-        int journal_data = ext4_should_journal_data(inode);
        sector_t pblock = 0, cur_logical = 0;
        struct ext4_io_submit io_submit;
@@ -1378,7 +1415,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
                if (nr_pages == 0)
                        break;
                for (i = 0; i < nr_pages; i++) {
-                        int commit_write = 0, skip_page = 0;
+                        int skip_page = 0;
                        struct page *page = pvec.pages[i];
                        index = page->index;
@@ -1400,27 +1437,9 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
                        BUG_ON(!PageLocked(page));
                        BUG_ON(PageWriteback(page));
-                        /*
-                         * If the page does not have buffers (for
-                         * whatever reason), try to create them using
-                         * __block_write_begin.  If this fails,
-                         * skip the page and move on.
-                         */
-                        if (!page_has_buffers(page)) {
-                                if (__block_write_begin(page, 0, len,
-                                                noalloc_get_block_write)) {
-                                skip_page:
-                                        unlock_page(page);
-                                        continue;
-                                }
-                                commit_write = 1;
-                        }
                        bh = page_bufs = page_buffers(page);
                        block_start = 0;
                        do {
-                                if (!bh)
-                                        goto skip_page;
                                if (map && (cur_logical >= map->m_lblk) &&
                                    (cur_logical <= (map->m_lblk +
                                                     (map->m_len - 1)))) {
@@ -1448,33 +1467,14 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
                                pblock++;
                        } while (bh != page_bufs);
-                        if (skip_page)
+                        if (skip_page) {
-                                goto skip_page;
+                                unlock_page(page);
+                                continue;
-                        if (commit_write)
+                        }
-                                /* mark the buffer_heads as dirty & uptodate */
-                                block_commit_write(page, 0, len);
                        clear_page_dirty_for_io(page);
-                        /*
+                        err = ext4_bio_write_page(&io_submit, page, len,
-                         * Delalloc doesn't support data journalling,
+                                                  mpd->wbc);
-                         * but eventually maybe we'll lift this
-                         * restriction.
-                         */
-                        if (unlikely(journal_data && PageChecked(page)))
-                                err = __ext4_journalled_writepage(page, len);
-                        else if (test_opt(inode->i_sb, MBLK_IO_SUBMIT))
-                                err = ext4_bio_write_page(&io_submit, page,
-                                                          len, mpd->wbc);
-                        else if (buffer_uninit(page_bufs)) {
-                                ext4_set_bh_endio(page_bufs, inode);
-                                err = block_write_full_page_endio(page,
-                                        noalloc_get_block_write,
-                                        mpd->wbc, ext4_end_io_buffer_write);
-                        } else
-                                err = block_write_full_page(page,
-                                        noalloc_get_block_write, mpd->wbc);
                        if (!err)
                                mpd->pages_written++;
                        /*
@@ -1640,7 +1640,7 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
                                 (unsigned long long) next,
                                 mpd->b_size >> mpd->inode->i_blkbits, err);
                        ext4_msg(sb, KERN_CRIT,
-                                "This should not happen!! Data will be lost\n");
+                                "This should not happen!! Data will be lost");
                        if (err == -ENOSPC)
                                ext4_print_free_blocks(mpd->inode);
                }
@@ -1690,16 +1690,16 @@ submit_io:
 *
 * @mpd->lbh - extent of blocks
 * @logical - logical number of the block in the file
- * @bh - bh of the block (used to access block's state)
+ * @b_state - b_state of the buffer head added
 *
 * the function is used to collect contig. blocks in same state
 */
-static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
+static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, sector_t logical,
-                                   sector_t logical, size_t b_size,
                                   unsigned long b_state)
 {
        sector_t next;
-        int nrblocks = mpd->b_size >> mpd->inode->i_blkbits;
+        int blkbits = mpd->inode->i_blkbits;
+        int nrblocks = mpd->b_size >> blkbits;
        /*
         * XXX Don't go larger than mballoc is willing to allocate
@@ -1707,11 +1707,11 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
         * mpage_da_submit_io() into this function and then call
         * ext4_map_blocks() multiple times in a loop
         */
-        if (nrblocks >= 8*1024*1024/mpd->inode->i_sb->s_blocksize)
+        if (nrblocks >= (8*1024*1024 >> blkbits))
                goto flush_it;
-        /* check if thereserved journal credits might overflow */
+        /* check if the reserved journal credits might overflow */
-        if (!(ext4_test_inode_flag(mpd->inode, EXT4_INODE_EXTENTS))) {
+        if (!ext4_test_inode_flag(mpd->inode, EXT4_INODE_EXTENTS)) {
                if (nrblocks >= EXT4_MAX_TRANS_DATA) {
                        /*
                         * With non-extent format we are limited by the journal
@@ -1720,16 +1720,6 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
                         * nrblocks.  So limit nrblocks.
                         */
                        goto flush_it;
-                } else if ((nrblocks + (b_size >> mpd->inode->i_blkbits)) >
-                                EXT4_MAX_TRANS_DATA) {
-                        /*
-                         * Adding the new buffer_head would make it cross the
-                         * allowed limit for which we have journal credit
-                         * reserved. So limit the new bh->b_size
-                         */
-                        b_size = (EXT4_MAX_TRANS_DATA - nrblocks) <<
-                                                mpd->inode->i_blkbits;
-                        /* we will do mpage_da_submit_io in the next loop */
                }
        }
        /*
@@ -1737,7 +1727,7 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
         */
        if (mpd->b_size == 0) {
                mpd->b_blocknr = logical;
-                mpd->b_size = b_size;
+                mpd->b_size = 1 << blkbits;
                mpd->b_state = b_state & BH_FLAGS;
                return;
        }
@@ -1747,7 +1737,7 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
         * Can we merge the block to our big extent?
         */
        if (logical == next && (b_state & BH_FLAGS) == mpd->b_state) {
-                mpd->b_size += b_size;
+                mpd->b_size += 1 << blkbits;
                return;
        }
@@ -1775,6 +1765,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
                              struct ext4_map_blocks *map,
                              struct buffer_head *bh)
 {
+        struct extent_status es;
        int retval;
        sector_t invalid_block = ~((sector_t) 0xffff);
@@ -1785,6 +1776,42 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
        ext_debug("ext4_da_map_blocks(): inode %lu, max_blocks %u,"
                  "logical block %lu\n", inode->i_ino, map->m_len,
                  (unsigned long) map->m_lblk);
+        /* Lookup extent status tree firstly */
+        if (ext4_es_lookup_extent(inode, iblock, &es)) {
+                if (ext4_es_is_hole(&es)) {
+                        retval = 0;
+                        down_read((&EXT4_I(inode)->i_data_sem));
+                        goto add_delayed;
+                }
+                /*
+                 * Delayed extent could be allocated by fallocate.
+                 * So we need to check it.
+                 */
+                if (ext4_es_is_delayed(&es) && !ext4_es_is_unwritten(&es)) {
+                        map_bh(bh, inode->i_sb, invalid_block);
+                        set_buffer_new(bh);
+                        set_buffer_delay(bh);
+                        return 0;
+                }
+                map->m_pblk = ext4_es_pblock(&es) + iblock - es.es_lblk;
+                retval = es.es_len - (iblock - es.es_lblk);
+                if (retval > map->m_len)
+                        retval = map->m_len;
+                map->m_len = retval;
+                if (ext4_es_is_written(&es))
+                        map->m_flags |= EXT4_MAP_MAPPED;
+                else if (ext4_es_is_unwritten(&es))
+                        map->m_flags |= EXT4_MAP_UNWRITTEN;
+                else
+                        BUG_ON(1);
+                return retval;
+        }
        /*
         * Try to see if we can get the block without requesting a new
         * file system block.
@@ -1803,11 +1830,15 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
                        map->m_flags |= EXT4_MAP_FROM_CLUSTER;
                retval = 0;
        } else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
-                retval = ext4_ext_map_blocks(NULL, inode, map, 0);
+                retval = ext4_ext_map_blocks(NULL, inode, map,
+                                             EXT4_GET_BLOCKS_NO_PUT_HOLE);
        else
-                retval = ext4_ind_map_blocks(NULL, inode, map, 0);
+                retval = ext4_ind_map_blocks(NULL, inode, map,
+                                             EXT4_GET_BLOCKS_NO_PUT_HOLE);
+add_delayed:
        if (retval == 0) {
+                int ret;
                /*
                 * XXX: __block_prepare_write() unmaps passed block,
                 * is it OK?
@@ -1815,15 +1846,20 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
                /* If the block was allocated from previously allocated cluster,
                 * then we dont need to reserve it again. */
                if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) {
-                        retval = ext4_da_reserve_space(inode, iblock);
+                        ret = ext4_da_reserve_space(inode, iblock);
-                        if (retval)
+                        if (ret) {
                                /* not enough space to reserve */
+                                retval = ret;
                                goto out_unlock;
+                        }
                }
-                retval = ext4_es_insert_extent(inode, map->m_lblk, map->m_len);
+                ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
-                if (retval)
+                                            ~0, EXTENT_STATUS_DELAYED);
+                if (ret) {
+                        retval = ret;
                        goto out_unlock;
+                }
                /* Clear EXT4_MAP_FROM_CLUSTER flag since its purpose is served
                 * and it should not appear on the bh->b_state.
@@ -1833,6 +1869,16 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
                map_bh(bh, inode->i_sb, invalid_block);
                set_buffer_new(bh);
                set_buffer_delay(bh);
+        } else if (retval > 0) {
+                int ret;
+                unsigned long long status;
+                status = map->m_flags & EXT4_MAP_UNWRITTEN ?
+                                EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
+                ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
+                                            map->m_pblk, status);
+                if (ret != 0)
+                        retval = ret;
        }
 out_unlock:
@@ -1890,27 +1936,6 @@ int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
        return 0;
 }
-/*
- * This function is used as a standard get_block_t calback function
- * when there is no desire to allocate any blocks.  It is used as a
- * callback function for block_write_begin() and block_write_full_page().
- * These functions should only try to map a single block at a time.
- *
- * Since this function doesn't do block allocations even if the caller
- * requests it by passing in create=1, it is critically important that
- * any caller checks to make sure that any buffer heads are returned
- * by this function are either all already mapped or marked for
- * delayed allocation before calling  block_write_full_page().  Otherwise,
- * b_blocknr could be left unitialized, and the page write functions will
- * be taken by surprise.
- */
-static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
-                                   struct buffer_head *bh_result, int create)
-{
-        BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize);
-        return _ext4_get_block(inode, iblock, bh_result, 0);
-}
 static int bget_one(handle_t *handle, struct buffer_head *bh)
 {
        get_bh(bh);
@@ -1955,7 +1980,8 @@ static int __ext4_journalled_writepage(struct page *page,
         * references to buffers so we are safe */
        unlock_page(page);
-        handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode));
+        handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
+                                    ext4_writepage_trans_blocks(inode));
        if (IS_ERR(handle)) {
                ret = PTR_ERR(handle);
                goto out;
@@ -2035,11 +2061,12 @@ out:
 static int ext4_writepage(struct page *page,
                          struct writeback_control *wbc)
 {
-        int ret = 0, commit_write = 0;
+        int ret = 0;
        loff_t size;
        unsigned int len;
        struct buffer_head *page_bufs = NULL;
        struct inode *inode = page->mapping->host;
+        struct ext4_io_submit io_submit;
        trace_ext4_writepage(page);
        size = i_size_read(inode);
@@ -2048,39 +2075,29 @@ static int ext4_writepage(struct page *page,
        else
                len = PAGE_CACHE_SIZE;
+        page_bufs = page_buffers(page);
        /*
-         * If the page does not have buffers (for whatever reason),
+         * We cannot do block allocation or other extent handling in this
-         * try to create them using __block_write_begin.  If this
+         * function. If there are buffers needing that, we have to redirty
-         * fails, redirty the page and move on.
+         * the page. But we may reach here when we do a journal commit via
+         * journal_submit_inode_data_buffers() and in that case we must write
+         * allocated buffers to achieve data=ordered mode guarantees.
         */
-        if (!page_has_buffers(page)) {
+        if (ext4_walk_page_buffers(NULL, page_bufs, 0, len, NULL,
-                if (__block_write_begin(page, 0, len,
+                                   ext4_bh_delay_or_unwritten)) {
-                                        noalloc_get_block_write)) {
+                redirty_page_for_writepage(wbc, page);
-                redirty_page:
+                if (current->flags & PF_MEMALLOC) {
-                        redirty_page_for_writepage(wbc, page);
+                        /*
+                         * For memory cleaning there's no point in writing only
+                         * some buffers. So just bail out. Warn if we came here
+                         * from direct reclaim.
+                         */
+                        WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD))
+                                                        == PF_MEMALLOC);
                        unlock_page(page);
                        return 0;
                }
-                commit_write = 1;
-        }
-        page_bufs = page_buffers(page);
-        if (ext4_walk_page_buffers(NULL, page_bufs, 0, len, NULL,
-                                   ext4_bh_delay_or_unwritten)) {
-                /*
-                 * We don't want to do block allocation, so redirty
-                 * the page and return.  We may reach here when we do
-                 * a journal commit via journal_submit_inode_data_buffers.
-                 * We can also reach here via shrink_page_list but it
-                 * should never be for direct reclaim so warn if that
-                 * happens
-                 */
-                WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) ==
-                                                                PF_MEMALLOC);
-                goto redirty_page;
        }
-        if (commit_write)
-                /* now mark the buffer_heads as dirty and uptodate */
-                block_commit_write(page, 0, len);
        if (PageChecked(page) && ext4_should_journal_data(inode))
                /*
@@ -2089,14 +2106,9 @@ static int ext4_writepage(struct page *page,
                 */
                return __ext4_journalled_writepage(page, len);
-        if (buffer_uninit(page_bufs)) {
+        memset(&io_submit, 0, sizeof(io_submit));
-                ext4_set_bh_endio(page_bufs, inode);
+        ret = ext4_bio_write_page(&io_submit, page, len, wbc);
-                ret = block_write_full_page_endio(page, noalloc_get_block_write,
+        ext4_io_submit(&io_submit);
-                                            wbc, ext4_end_io_buffer_write);
-        } else
-                ret = block_write_full_page(page, noalloc_get_block_write,
-                                            wbc);
        return ret;
 }
@@ -2228,51 +2240,38 @@ static int write_cache_pages_da(handle_t *handle,
                        logical = (sector_t) page->index <<
                                (PAGE_CACHE_SHIFT - inode->i_blkbits);
-                        if (!page_has_buffers(page)) {
+                        /* Add all dirty buffers to mpd */
-                                mpage_add_bh_to_extent(mpd, logical,
+                        head = page_buffers(page);
-                                                       PAGE_CACHE_SIZE,
+                        bh = head;
-                                                       (1 << BH_Dirty) | (1 << BH_Uptodate));
+                        do {
-                                if (mpd->io_done)
+                                BUG_ON(buffer_locked(bh));
-                                        goto ret_extent_tail;
-                        } else {
                                /*
-                                 * Page with regular buffer heads,
+                                 * We need to try to allocate unmapped blocks
-                                 * just add all dirty ones
+                                 * in the same page.  Otherwise we won't make
+                                 * progress with the page in ext4_writepage
                                 */
-                                head = page_buffers(page);
+                                if (ext4_bh_delay_or_unwritten(NULL, bh)) {
-                                bh = head;
+                                        mpage_add_bh_to_extent(mpd, logical,
-                                do {
+                                                               bh->b_state);
-                                        BUG_ON(buffer_locked(bh));
+                                        if (mpd->io_done)
+                                                goto ret_extent_tail;
+                                } else if (buffer_dirty(bh) &&
+                                           buffer_mapped(bh)) {
                                        /*
-                                         * We need to try to allocate
+                                         * mapped dirty buffer. We need to
-                                         * unmapped blocks in the same page.
+                                         * update the b_state because we look
-                                         * Otherwise we won't make progress
+                                         * at b_state in mpage_da_map_blocks.
-                                         * with the page in ext4_writepage
+                                         * We don't update b_size because if we
+                                         * find an unmapped buffer_head later
+                                         * we need to use the b_state flag of
+                                         * that buffer_head.
                                         */
-                                        if (ext4_bh_delay_or_unwritten(NULL, bh)) {
+                                        if (mpd->b_size == 0)
-                                                mpage_add_bh_to_extent(mpd, logical,
+                                                mpd->b_state =
-                                                                       bh->b_size,
+                                                        bh->b_state & BH_FLAGS;
-                                                                       bh->b_state);
+                                }
-                                                if (mpd->io_done)
+                                logical++;
-                                                        goto ret_extent_tail;
+                        } while ((bh = bh->b_this_page) != head);
-                                        } else if (buffer_dirty(bh) && (buffer_mapped(bh))) {
-                                                /*
-                                                 * mapped dirty buffer. We need
-                                                 * to update the b_state
-                                                 * because we look at b_state
-                                                 * in mpage_da_map_blocks.  We
-                                                 * don't update b_size because
-                                                 * if we find an unmapped
-                                                 * buffer_head later we need to
-                                                 * use the b_state flag of that
-                                                 * buffer_head.
-                                                 */
-                                                if (mpd->b_size == 0)
-                                                        mpd->b_state = bh->b_state & BH_FLAGS;
-                                        }
-                                        logical++;
-                                } while ((bh = bh->b_this_page) != head);
-                        }
                        if (nr_to_write > 0) {
                                nr_to_write--;
@@ -2413,7 +2412,8 @@ retry:
                needed_blocks = ext4_da_writepages_trans_blocks(inode);
                /* start a new transaction*/
-                handle = ext4_journal_start(inode, needed_blocks);
+                handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
+                                            needed_blocks);
                if (IS_ERR(handle)) {
                        ret = PTR_ERR(handle);
                        ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: "
@@ -2512,12 +2512,8 @@ static int ext4_nonda_switch(struct super_block *sb)
        /*
         * Start pushing delalloc when 1/2 of free blocks are dirty.
         */
-        if (dirty_blocks && (free_blocks < 2 * dirty_blocks) &&
+        if (dirty_blocks && (free_blocks < 2 * dirty_blocks))
-            !writeback_in_progress(sb->s_bdi) &&
+                try_to_writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE);
-            down_read_trylock(&sb->s_umount)) {
-                writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE);
-                up_read(&sb->s_umount);
-        }
        if (2 * free_blocks < 3 * dirty_blocks ||
                free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) {
@@ -2555,42 +2551,52 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
                                                      pos, len, flags,
                                                      pagep, fsdata);
                if (ret < 0)
-                        goto out;
+                        return ret;
-                if (ret == 1) {
+                if (ret == 1)
-                        ret = 0;
+                        return 0;
-                        goto out;
-                }
        }
-retry:
+        /*
+         * grab_cache_page_write_begin() can take a long time if the
+         * system is thrashing due to memory pressure, or if the page
+         * is being written back.  So grab it first before we start
+         * the transaction handle.  This also allows us to allocate
+         * the page (if needed) without using GFP_NOFS.
+         */
+retry_grab:
+        page = grab_cache_page_write_begin(mapping, index, flags);
+        if (!page)
+                return -ENOMEM;
+        unlock_page(page);
        /*
         * With delayed allocation, we don't log the i_disksize update
         * if there is delayed block allocation. But we still need
         * to journalling the i_disksize update if writes to the end
         * of file which has an already mapped buffer.
         */
-        handle = ext4_journal_start(inode, 1);
+retry_journal:
+        handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, 1);
        if (IS_ERR(handle)) {
-                ret = PTR_ERR(handle);
+                page_cache_release(page);
-                goto out;
+                return PTR_ERR(handle);
        }
-        /* We cannot recurse into the filesystem as the transaction is already
-         * started */
-        flags |= AOP_FLAG_NOFS;
-        page = grab_cache_page_write_begin(mapping, index, flags);
+        lock_page(page);
-        if (!page) {
+        if (page->mapping != mapping) {
+                /* The page got truncated from under us */
+                unlock_page(page);
+                page_cache_release(page);
                ext4_journal_stop(handle);
-                ret = -ENOMEM;
+                goto retry_grab;
-                goto out;
        }
-        *pagep = page;
+        /* In case writeback began while the page was unlocked */
+        wait_on_page_writeback(page);
        ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep);
        if (ret < 0) {
                unlock_page(page);
                ext4_journal_stop(handle);
-                page_cache_release(page);
                /*
                 * block_write_begin may have instantiated a few blocks
                 * outside i_size.  Trim these off again. Don't need
@@ -2598,11 +2604,16 @@ retry:
                 */
                if (pos + len > inode->i_size)
                        ext4_truncate_failed_write(inode);
+                if (ret == -ENOSPC &&
+                    ext4_should_retry_alloc(inode->i_sb, &retries))
+                        goto retry_journal;
+                page_cache_release(page);
+                return ret;
        }
-        if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
+        *pagep = page;
-                goto retry;
-out:
        return ret;
 }
@@ -2858,36 +2869,10 @@ ext4_readpages(struct file *file, struct address_space *mapping,
        return mpage_readpages(mapping, pages, nr_pages, ext4_get_block);
 }
-static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset)
-{
-        struct buffer_head *head, *bh;
-        unsigned int curr_off = 0;
-        if (!page_has_buffers(page))
-                return;
-        head = bh = page_buffers(page);
-        do {
-                if (offset <= curr_off && test_clear_buffer_uninit(bh)
-                                        && bh->b_private) {
-                        ext4_free_io_end(bh->b_private);
-                        bh->b_private = NULL;
-                        bh->b_end_io = NULL;
-                }
-                curr_off = curr_off + bh->b_size;
-                bh = bh->b_this_page;
-        } while (bh != head);
-}
 static void ext4_invalidatepage(struct page *page, unsigned long offset)
 {
        trace_ext4_invalidatepage(page, offset);
-        /*
-         * free any io_end structure allocated for buffers to be discarded
-         */
-        if (ext4_should_dioread_nolock(page->mapping->host))
-                ext4_invalidatepage_free_endio(page, offset);
        /* No journalling happens on data buffers when this function is used */
        WARN_ON(page_has_buffers(page) && buffer_jbd(page_buffers(page)));
@@ -2959,7 +2944,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
                            ssize_t size, void *private, int ret,
                            bool is_async)
 {
-        struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(iocb->ki_filp);
        ext4_io_end_t *io_end = iocb->private;
        /* if not async direct IO or dio with 0 bytes write, just return */
@@ -2977,9 +2962,9 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
        if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
                ext4_free_io_end(io_end);
 out:
+                inode_dio_done(inode);
                if (is_async)
                        aio_complete(iocb, ret, 0);
-                inode_dio_done(inode);
                return;
        }
@@ -2993,65 +2978,6 @@ out:
        ext4_add_complete_io(io_end);
 }
-static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
-{
-        ext4_io_end_t *io_end = bh->b_private;
-        struct inode *inode;
-        if (!test_clear_buffer_uninit(bh) || !io_end)
-                goto out;
-        if (!(io_end->inode->i_sb->s_flags & MS_ACTIVE)) {
-                ext4_msg(io_end->inode->i_sb, KERN_INFO,
-                         "sb umounted, discard end_io request for inode %lu",
-                         io_end->inode->i_ino);
-                ext4_free_io_end(io_end);
-                goto out;
-        }
-        /*
-         * It may be over-defensive here to check EXT4_IO_END_UNWRITTEN now,
-         * but being more careful is always safe for the future change.
-         */
-        inode = io_end->inode;
-        ext4_set_io_unwritten_flag(inode, io_end);
-        ext4_add_complete_io(io_end);
-out:
-        bh->b_private = NULL;
-        bh->b_end_io = NULL;
-        clear_buffer_uninit(bh);
-        end_buffer_async_write(bh, uptodate);
-}
-static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode)
-{
-        ext4_io_end_t *io_end;
-        struct page *page = bh->b_page;
-        loff_t offset = (sector_t)page->index << PAGE_CACHE_SHIFT;
-        size_t size = bh->b_size;
-retry:
-        io_end = ext4_init_io_end(inode, GFP_ATOMIC);
-        if (!io_end) {
-                pr_warn_ratelimited("%s: allocation fail\n", __func__);
-                schedule();
-                goto retry;
-        }
-        io_end->offset = offset;
-        io_end->size = size;
-        /*
-         * We need to hold a reference to the page to make sure it
-         * doesn't get evicted before ext4_end_io_work() has a chance
-         * to convert the extent from written to unwritten.
-         */
-        io_end->page = page;
-        get_page(io_end->page);
-        bh->b_private = io_end;
-        bh->b_end_io = ext4_end_io_buffer_write;
-        return 0;
-}
 /*
 * For ext4 extent files, ext4 will do direct-io write to holes,
 * preallocated extents, and those write extend the file, no need to
@@ -3553,20 +3479,20 @@ int ext4_can_truncate(struct inode *inode)
 int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
 {
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(file);
        if (!S_ISREG(inode->i_mode))
                return -EOPNOTSUPP;
-        if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
+        if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
-                /* TODO: Add support for non extent hole punching */
+                return ext4_ind_punch_hole(file, offset, length);
-                return -EOPNOTSUPP;
-        }
        if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) {
                /* TODO: Add support for bigalloc file systems */
                return -EOPNOTSUPP;
        }
+        trace_ext4_punch_hole(inode, offset, length);
        return ext4_ext_punch_hole(file, offset, length);
 }
@@ -3660,11 +3586,8 @@ static int __ext4_get_inode_loc(struct inode *inode,
        iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb);
        bh = sb_getblk(sb, block);
-        if (!bh) {
+        if (unlikely(!bh))
-                EXT4_ERROR_INODE_BLOCK(inode, block,
+                return -ENOMEM;
-                                       "unable to read itable block");
-                return -EIO;
-        }
        if (!buffer_uptodate(bh)) {
                lock_buffer(bh);
@@ -3696,7 +3619,7 @@ static int __ext4_get_inode_loc(struct inode *inode,
                        /* Is the inode bitmap in cache? */
                        bitmap_bh = sb_getblk(sb, ext4_inode_bitmap(sb, gdp));
-                        if (!bitmap_bh)
+                        if (unlikely(!bitmap_bh))
                                goto make_io;
                        /*
@@ -4404,8 +4327,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
                /* (user+group)*(old+new) structure, inode write (sb,
                 * inode block, ? - but truncate inode update has it) */
-                handle = ext4_journal_start(inode, (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+
+                handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
-                                        EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb))+3);
+                        (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb) +
+                         EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb)) + 3);
                if (IS_ERR(handle)) {
                        error = PTR_ERR(handle);
                        goto err_out;
@@ -4440,7 +4364,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
            (attr->ia_size < inode->i_size)) {
                handle_t *handle;
-                handle = ext4_journal_start(inode, 3);
+                handle = ext4_journal_start(inode, EXT4_HT_INODE, 3);
                if (IS_ERR(handle)) {
                        error = PTR_ERR(handle);
                        goto err_out;
@@ -4460,7 +4384,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
                                                            attr->ia_size);
                        if (error) {
                                /* Do as much error cleanup as possible */
-                                handle = ext4_journal_start(inode, 3);
+                                handle = ext4_journal_start(inode,
+                                                            EXT4_HT_INODE, 3);
                                if (IS_ERR(handle)) {
                                        ext4_orphan_del(NULL, inode);
                                        goto err_out;
@@ -4801,7 +4726,7 @@ void ext4_dirty_inode(struct inode *inode, int flags)
 {
        handle_t *handle;
-        handle = ext4_journal_start(inode, 2);
+        handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
        if (IS_ERR(handle))
                goto out;
@@ -4902,7 +4827,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
        /* Finally we can mark the inode as dirty. */
-        handle = ext4_journal_start(inode, 1);
+        handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
        if (IS_ERR(handle))
                return PTR_ERR(handle);
@@ -4926,7 +4851,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
        unsigned long len;
        int ret;
        struct file *file = vma->vm_file;
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(file);
        struct address_space *mapping = inode->i_mapping;
        handle_t *handle;
        get_block_t *get_block;
@@ -4968,7 +4893,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
                                            0, len, NULL,
                                            ext4_bh_unmapped)) {
                        /* Wait so that we don't change page under IO */
-                        wait_on_page_writeback(page);
+                        wait_for_stable_page(page);
                        ret = VM_FAULT_LOCKED;
                        goto out;
                }
@@ -4980,7 +4905,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
        else
                get_block = ext4_get_block;
 retry_alloc:
-        handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode));
+        handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
+                                    ext4_writepage_trans_blocks(inode));
        if (IS_ERR(handle)) {
                ret = VM_FAULT_SIGBUS;
                goto out;
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 5747f52f7c72..721f4d33e148 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -22,7 +22,7 @@
 long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
-        struct inode *inode = filp->f_dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct super_block *sb = inode->i_sb;
        struct ext4_inode_info *ei = EXT4_I(inode);
        unsigned int flags;
@@ -104,7 +104,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                } else if (oldflags & EXT4_EOFBLOCKS_FL)
                        ext4_truncate(inode);
-                handle = ext4_journal_start(inode, 1);
+                handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
                if (IS_ERR(handle)) {
                        err = PTR_ERR(handle);
                        goto flags_out;
@@ -173,7 +173,7 @@ flags_out:
                }
                mutex_lock(&inode->i_mutex);
-                handle = ext4_journal_start(inode, 1);
+                handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
                if (IS_ERR(handle)) {
                        err = PTR_ERR(handle);
                        goto unlock_out;
@@ -313,6 +313,9 @@ mext_out:
                if (err == 0)
                        err = err2;
                mnt_drop_write_file(filp);
+                if (!err && ext4_has_group_desc_csum(sb) &&
+                    test_opt(sb, INIT_INODE_TABLE))
+                        err = ext4_register_li_request(sb, input.group);
 group_add_out:
                ext4_resize_end(sb);
                return err;
@@ -358,6 +361,7 @@ group_add_out:
                ext4_fsblk_t n_blocks_count;
                struct super_block *sb = inode->i_sb;
                int err = 0, err2 = 0;
+                ext4_group_t o_group = EXT4_SB(sb)->s_groups_count;
                if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
                               EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
@@ -388,6 +392,11 @@ group_add_out:
                if (err == 0)
                        err = err2;
                mnt_drop_write_file(filp);
+                if (!err && (o_group > EXT4_SB(sb)->s_groups_count) &&
+                    ext4_has_group_desc_csum(sb) &&
+                    test_opt(sb, INIT_INODE_TABLE))
+                        err = ext4_register_li_request(sb, o_group);
 resizefs_out:
                ext4_resize_end(sb);
                return err;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 1bf6fe785c4f..6540ebe058e3 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -23,11 +23,18 @@
 #include "ext4_jbd2.h"
 #include "mballoc.h"
-#include <linux/debugfs.h>
 #include <linux/log2.h>
+#include <linux/module.h>
 #include <linux/slab.h>
 #include <trace/events/ext4.h>
+#ifdef CONFIG_EXT4_DEBUG
+ushort ext4_mballoc_debug __read_mostly;
+module_param_named(mballoc_debug, ext4_mballoc_debug, ushort, 0644);
+MODULE_PARM_DESC(mballoc_debug, "Debugging level for ext4's mballoc");
+#endif
 /*
 * MUSTDO:
 *   - test ext4_ext_search_left() and ext4_ext_search_right()
@@ -1884,15 +1891,19 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
        case 0:
                BUG_ON(ac->ac_2order == 0);
-                if (grp->bb_largest_free_order < ac->ac_2order)
-                        return 0;
                /* Avoid using the first bg of a flexgroup for data files */
                if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
                    (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) &&
                    ((group % flex_size) == 0))
                        return 0;
+                if ((ac->ac_2order > ac->ac_sb->s_blocksize_bits+1) ||
+                    (free / fragments) >= ac->ac_g_ex.fe_len)
+                        return 1;
+                if (grp->bb_largest_free_order < ac->ac_2order)
+                        return 0;
                return 1;
        case 1:
                if ((free / fragments) >= ac->ac_g_ex.fe_len)
@@ -2007,7 +2018,7 @@ repeat:
                        }
                        ac->ac_groups_scanned++;
-                        if (cr == 0)
+                        if (cr == 0 && ac->ac_2order < sb->s_blocksize_bits+2)
                                ext4_mb_simple_scan_group(ac, &e4b);
                        else if (cr == 1 && sbi->s_stripe &&
                                        !(ac->ac_g_ex.fe_len % sbi->s_stripe))
@@ -2656,40 +2667,6 @@ static void ext4_free_data_callback(struct super_block *sb,
        mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
 }
-#ifdef CONFIG_EXT4_DEBUG
-u8 mb_enable_debug __read_mostly;
-static struct dentry *debugfs_dir;
-static struct dentry *debugfs_debug;
-static void __init ext4_create_debugfs_entry(void)
-{
-        debugfs_dir = debugfs_create_dir("ext4", NULL);
-        if (debugfs_dir)
-                debugfs_debug = debugfs_create_u8("mballoc-debug",
-                                                  S_IRUGO | S_IWUSR,
-                                                  debugfs_dir,
-                                                  &mb_enable_debug);
-}
-static void ext4_remove_debugfs_entry(void)
-{
-        debugfs_remove(debugfs_debug);
-        debugfs_remove(debugfs_dir);
-}
-#else
-static void __init ext4_create_debugfs_entry(void)
-{
-}
-static void ext4_remove_debugfs_entry(void)
-{
-}
-#endif
 int __init ext4_init_mballoc(void)
 {
        ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
@@ -2711,7 +2688,6 @@ int __init ext4_init_mballoc(void)
                kmem_cache_destroy(ext4_ac_cachep);
                return -ENOMEM;
        }
-        ext4_create_debugfs_entry();
        return 0;
 }
@@ -2726,7 +2702,6 @@ void ext4_exit_mballoc(void)
        kmem_cache_destroy(ext4_ac_cachep);
        kmem_cache_destroy(ext4_free_data_cachep);
        ext4_groupinfo_destroy_slabs();
-        ext4_remove_debugfs_entry();
 }
@@ -3872,7 +3847,7 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
        struct super_block *sb = ac->ac_sb;
        ext4_group_t ngroups, i;
-        if (!mb_enable_debug ||
+        if (!ext4_mballoc_debug ||
            (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED))
                return;
@@ -4005,8 +3980,8 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
        len = ar->len;
        /* just a dirty hack to filter too big requests  */
-        if (len >= EXT4_CLUSTERS_PER_GROUP(sb) - 10)
+        if (len >= EXT4_CLUSTERS_PER_GROUP(sb))
-                len = EXT4_CLUSTERS_PER_GROUP(sb) - 10;
+                len = EXT4_CLUSTERS_PER_GROUP(sb);
        /* start searching from the goal */
        goal = ar->goal;
@@ -4136,7 +4111,7 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
                /* The max size of hash table is PREALLOC_TB_SIZE */
                order = PREALLOC_TB_SIZE - 1;
        /* Add the prealloc space to lg */
-        rcu_read_lock();
+        spin_lock(&lg->lg_prealloc_lock);
        list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
                                                pa_inode_list) {
                spin_lock(&tmp_pa->pa_lock);
@@ -4160,12 +4135,12 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
        if (!added)
                list_add_tail_rcu(&pa->pa_inode_list,
                                        &lg->lg_prealloc_list[order]);
-        rcu_read_unlock();
+        spin_unlock(&lg->lg_prealloc_lock);
        /* Now trim the list to be not more than 8 elements */
        if (lg_prealloc_count > 8) {
                ext4_mb_discard_lg_preallocations(sb, lg,
-                                                order, lg_prealloc_count);
+                                                  order, lg_prealloc_count);
                return;
        }
        return ;
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 3ccd889ba953..08481ee84cd5 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -37,11 +37,11 @@
 /*
 */
 #ifdef CONFIG_EXT4_DEBUG
-extern u8 mb_enable_debug;
+extern ushort ext4_mballoc_debug;
 #define mb_debug(n, fmt, a...)                                          \
        do {                                                            \
-                if ((n) <= mb_enable_debug) {                           \
+                if ((n) <= ext4_mballoc_debug) {                        \
                        printk(KERN_DEBUG "(%s, %d): %s: ",             \
                               __FILE__, __LINE__, __func__);           \
                        printk(fmt, ## a);                              \
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index db8226d595fa..480acf4a085f 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -456,11 +456,14 @@ int ext4_ext_migrate(struct inode *inode)
                 */
                return retval;
-        handle = ext4_journal_start(inode,
+        /*
-                                        EXT4_DATA_TRANS_BLOCKS(inode->i_sb) +
+         * Worst case we can touch the allocation bitmaps, a bgd
-                                        EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+         * block, and a block to link in the orphan list.  We do need
-                                        EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)
+         * need to worry about credits for modifying the quota inode.
-                                        + 1);
+         */
+        handle = ext4_journal_start(inode, EXT4_HT_MIGRATE,
+                4 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb));
        if (IS_ERR(handle)) {
                retval = PTR_ERR(handle);
                return retval;
@@ -507,7 +510,7 @@ int ext4_ext_migrate(struct inode *inode)
        ext4_set_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
        up_read((&EXT4_I(inode)->i_data_sem));
-        handle = ext4_journal_start(inode, 1);
+        handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1);
        if (IS_ERR(handle)) {
                /*
                 * It is impossible to update on-disk structures without
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index fe7c63f4717e..f9b551561d2c 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -80,6 +80,8 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
         * is not blocked in the elevator. */
        if (!*bh)
                *bh = sb_getblk(sb, mmp_block);
+        if (!*bh)
+                return -ENOMEM;
        if (*bh) {
                get_bh(*bh);
                lock_buffer(*bh);
@@ -91,7 +93,7 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
                        *bh = NULL;
                }
        }
-        if (!*bh) {
+        if (unlikely(!*bh)) {
                ext4_warning(sb, "Error while reading MMP block %llu",
                             mmp_block);
                return -EIO;
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index d9cc5ee42f53..4e81d47aa8cb 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -681,6 +681,8 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
        depth = ext_depth(donor_inode);
        dext = donor_path[depth].p_ext;
+        if (unlikely(!dext))
+                goto missing_donor_extent;
        tmp_dext = *dext;
        *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
@@ -691,7 +693,8 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
        /* Loop for the donor extents */
        while (1) {
                /* The extent for donor must be found. */
-                if (!dext) {
+                if (unlikely(!dext)) {
+                missing_donor_extent:
                        EXT4_ERROR_INODE(donor_inode,
                                   "The extent for donor must be found");
                        *err = -EIO;
@@ -761,9 +764,6 @@ out:
                kfree(donor_path);
        }
-        ext4_ext_invalidate_cache(orig_inode);
-        ext4_ext_invalidate_cache(donor_inode);
        return replaced_count;
 }
@@ -900,7 +900,7 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
                  pgoff_t orig_page_offset, int data_offset_in_page,
                  int block_len_in_page, int uninit, int *err)
 {
-        struct inode *orig_inode = o_filp->f_dentry->d_inode;
+        struct inode *orig_inode = file_inode(o_filp);
        struct page *pagep[2] = {NULL, NULL};
        handle_t *handle;
        ext4_lblk_t orig_blk_offset;
@@ -920,7 +920,7 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
 again:
        *err = 0;
        jblocks = ext4_writepage_trans_blocks(orig_inode) * 2;
-        handle = ext4_journal_start(orig_inode, jblocks);
+        handle = ext4_journal_start(orig_inode, EXT4_HT_MOVE_EXTENTS, jblocks);
        if (IS_ERR(handle)) {
                *err = PTR_ERR(handle);
                return 0;
@@ -1279,8 +1279,8 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
                 __u64 orig_start, __u64 donor_start, __u64 len,
                 __u64 *moved_len)
 {
-        struct inode *orig_inode = o_filp->f_dentry->d_inode;
+        struct inode *orig_inode = file_inode(o_filp);
-        struct inode *donor_inode = d_filp->f_dentry->d_inode;
+        struct inode *donor_inode = file_inode(d_filp);
        struct ext4_ext_path *orig_path = NULL, *holecheck_path = NULL;
        struct ext4_extent *ext_prev, *ext_cur, *ext_dummy;
        ext4_lblk_t block_start = orig_start;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 8990165346ee..3825d6aa8336 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -47,38 +47,111 @@
 #define NAMEI_RA_CHUNKS  2
 #define NAMEI_RA_BLOCKS  4
 #define NAMEI_RA_SIZE        (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
-#define NAMEI_RA_INDEX(c,b)  (((c) * NAMEI_RA_BLOCKS) + (b))
 static struct buffer_head *ext4_append(handle_t *handle,
                                        struct inode *inode,
-                                        ext4_lblk_t *block, int *err)
+                                        ext4_lblk_t *block)
 {
        struct buffer_head *bh;
+        int err = 0;
        if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb &&
                     ((inode->i_size >> 10) >=
-                      EXT4_SB(inode->i_sb)->s_max_dir_size_kb))) {
+                      EXT4_SB(inode->i_sb)->s_max_dir_size_kb)))
-                *err = -ENOSPC;
+                return ERR_PTR(-ENOSPC);
-                return NULL;
-        }
        *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
-        bh = ext4_bread(handle, inode, *block, 1, err);
+        bh = ext4_bread(handle, inode, *block, 1, &err);
-        if (bh) {
+        if (!bh)
-                inode->i_size += inode->i_sb->s_blocksize;
+                return ERR_PTR(err);
-                EXT4_I(inode)->i_disksize = inode->i_size;
+        inode->i_size += inode->i_sb->s_blocksize;
-                *err = ext4_journal_get_write_access(handle, bh);
+        EXT4_I(inode)->i_disksize = inode->i_size;
-                if (*err) {
+        err = ext4_journal_get_write_access(handle, bh);
+        if (err) {
+                brelse(bh);
+                ext4_std_error(inode->i_sb, err);
+                return ERR_PTR(err);
+        }
+        return bh;
+}
+static int ext4_dx_csum_verify(struct inode *inode,
+                               struct ext4_dir_entry *dirent);
+typedef enum {
+        EITHER, INDEX, DIRENT
+} dirblock_type_t;
+#define ext4_read_dirblock(inode, block, type) \
+        __ext4_read_dirblock((inode), (block), (type), __LINE__)
+static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
+                                              ext4_lblk_t block,
+                                              dirblock_type_t type,
+                                              unsigned int line)
+{
+        struct buffer_head *bh;
+        struct ext4_dir_entry *dirent;
+        int err = 0, is_dx_block = 0;
+        bh = ext4_bread(NULL, inode, block, 0, &err);
+        if (!bh) {
+                if (err == 0) {
+                        ext4_error_inode(inode, __func__, line, block,
+                                               "Directory hole found");
+                        return ERR_PTR(-EIO);
+                }
+                __ext4_warning(inode->i_sb, __func__, line,
+                               "error reading directory block "
+                               "(ino %lu, block %lu)", inode->i_ino,
+                               (unsigned long) block);
+                return ERR_PTR(err);
+        }
+        dirent = (struct ext4_dir_entry *) bh->b_data;
+        /* Determine whether or not we have an index block */
+        if (is_dx(inode)) {
+                if (block == 0)
+                        is_dx_block = 1;
+                else if (ext4_rec_len_from_disk(dirent->rec_len,
+                                                inode->i_sb->s_blocksize) ==
+                         inode->i_sb->s_blocksize)
+                        is_dx_block = 1;
+        }
+        if (!is_dx_block && type == INDEX) {
+                ext4_error_inode(inode, __func__, line, block,
+                       "directory leaf block found instead of index block");
+                return ERR_PTR(-EIO);
+        }
+        if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+                                        EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) ||
+            buffer_verified(bh))
+                return bh;
+        /*
+         * An empty leaf block can get mistaken for a index block; for
+         * this reason, we can only check the index checksum when the
+         * caller is sure it should be an index block.
+         */
+        if (is_dx_block && type == INDEX) {
+                if (ext4_dx_csum_verify(inode, dirent))
+                        set_buffer_verified(bh);
+                else {
+                        ext4_error_inode(inode, __func__, line, block,
+                                "Directory index failed checksum");
                        brelse(bh);
-                        bh = NULL;
+                        return ERR_PTR(-EIO);
                }
        }
-        if (!bh && !(*err)) {
+        if (!is_dx_block) {
-                *err = -EIO;
+                if (ext4_dirent_csum_verify(inode, dirent))
-                ext4_error(inode->i_sb,
+                        set_buffer_verified(bh);
-                           "Directory hole detected on inode %lu\n",
+                else {
-                           inode->i_ino);
+                        ext4_error_inode(inode, __func__, line, block,
+                                "Directory block failed checksum");
+                        brelse(bh);
+                        return ERR_PTR(-EIO);
+                }
        }
        return bh;
 }
@@ -604,9 +677,9 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
        u32 hash;
        frame->bh = NULL;
-        if (!(bh = ext4_bread(NULL, dir, 0, 0, err))) {
+        bh = ext4_read_dirblock(dir, 0, INDEX);
-                if (*err == 0)
+        if (IS_ERR(bh)) {
-                        *err = ERR_BAD_DX_DIR;
+                *err = PTR_ERR(bh);
                goto fail;
        }
        root = (struct dx_root *) bh->b_data;
@@ -643,15 +716,6 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
                goto fail;
        }
-        if (!buffer_verified(bh) &&
-            !ext4_dx_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data)) {
-                ext4_warning(dir->i_sb, "Root failed checksum");
-                brelse(bh);
-                *err = ERR_BAD_DX_DIR;
-                goto fail;
-        }
-        set_buffer_verified(bh);
        entries = (struct dx_entry *) (((char *)&root->info) +
                                       root->info.info_length);
@@ -709,22 +773,12 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
                frame->entries = entries;
                frame->at = at;
                if (!indirect--) return frame;
-                if (!(bh = ext4_bread(NULL, dir, dx_get_block(at), 0, err))) {
+                bh = ext4_read_dirblock(dir, dx_get_block(at), INDEX);
-                        if (!(*err))
+                if (IS_ERR(bh)) {
-                                *err = ERR_BAD_DX_DIR;
+                        *err = PTR_ERR(bh);
                        goto fail2;
                }
-                at = entries = ((struct dx_node *) bh->b_data)->entries;
+                entries = ((struct dx_node *) bh->b_data)->entries;
-                if (!buffer_verified(bh) &&
-                    !ext4_dx_csum_verify(dir,
-                                         (struct ext4_dir_entry *)bh->b_data)) {
-                        ext4_warning(dir->i_sb, "Node failed checksum");
-                        brelse(bh);
-                        *err = ERR_BAD_DX_DIR;
-                        goto fail;
-                }
-                set_buffer_verified(bh);
                if (dx_get_limit(entries) != dx_node_limit (dir)) {
                        ext4_warning(dir->i_sb,
@@ -783,7 +837,7 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash,
 {
        struct dx_frame *p;
        struct buffer_head *bh;
-        int err, num_frames = 0;
+        int num_frames = 0;
        __u32 bhash;
        p = frame;
@@ -822,25 +876,9 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash,
         * block so no check is necessary
         */
        while (num_frames--) {
-                if (!(bh = ext4_bread(NULL, dir, dx_get_block(p->at),
+                bh = ext4_read_dirblock(dir, dx_get_block(p->at), INDEX);
-                                      0, &err))) {
+                if (IS_ERR(bh))
-                        if (!err) {
+                        return PTR_ERR(bh);
-                                ext4_error(dir->i_sb,
-                                           "Directory hole detected on inode %lu\n",
-                                           dir->i_ino);
-                                return -EIO;
-                        }
-                        return err; /* Failure */
-                }
-                if (!buffer_verified(bh) &&
-                    !ext4_dx_csum_verify(dir,
-                                         (struct ext4_dir_entry *)bh->b_data)) {
-                        ext4_warning(dir->i_sb, "Node failed checksum");
-                        return -EIO;
-                }
-                set_buffer_verified(bh);
                p++;
                brelse(p->bh);
                p->bh = bh;
@@ -866,20 +904,9 @@ static int htree_dirblock_to_tree(struct file *dir_file,
        dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n",
                                                        (unsigned long)block));
-        if (!(bh = ext4_bread(NULL, dir, block, 0, &err))) {
+        bh = ext4_read_dirblock(dir, block, DIRENT);
-                if (!err) {
+        if (IS_ERR(bh))
-                        err = -EIO;
+                return PTR_ERR(bh);
-                        ext4_error(dir->i_sb,
-                                   "Directory hole detected on inode %lu\n",
-                                   dir->i_ino);
-                }
-                return err;
-        }
-        if (!buffer_verified(bh) &&
-            !ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data))
-                return -EIO;
-        set_buffer_verified(bh);
        de = (struct ext4_dir_entry_2 *) bh->b_data;
        top = (struct ext4_dir_entry_2 *) ((char *) de +
@@ -937,7 +964,7 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
        dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n",
                       start_hash, start_minor_hash));
-        dir = dir_file->f_path.dentry->d_inode;
+        dir = file_inode(dir_file);
        if (!(ext4_test_inode_flag(dir, EXT4_INODE_INDEX))) {
                hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
                if (hinfo.hash_version <= DX_HASH_TEA)
@@ -1333,26 +1360,11 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q
                return NULL;
        do {
                block = dx_get_block(frame->at);
-                if (!(bh = ext4_bread(NULL, dir, block, 0, err))) {
+                bh = ext4_read_dirblock(dir, block, DIRENT);
-                        if (!(*err)) {
+                if (IS_ERR(bh)) {
-                                *err = -EIO;
+                        *err = PTR_ERR(bh);
-                                ext4_error(dir->i_sb,
-                                           "Directory hole detected on inode %lu\n",
-                                           dir->i_ino);
-                        }
-                        goto errout;
-                }
-                if (!buffer_verified(bh) &&
-                    !ext4_dirent_csum_verify(dir,
-                                (struct ext4_dir_entry *)bh->b_data)) {
-                        EXT4_ERROR_INODE(dir, "checksumming directory "
-                                         "block %lu", (unsigned long)block);
-                        brelse(bh);
-                        *err = -EIO;
                        goto errout;
                }
-                set_buffer_verified(bh);
                retval = search_dirblock(bh, dir, d_name,
                                         block << EXT4_BLOCK_SIZE_BITS(sb),
                                         res_dir);
@@ -1536,11 +1548,12 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
                csum_size = sizeof(struct ext4_dir_entry_tail);
-        bh2 = ext4_append (handle, dir, &newblock, &err);
+        bh2 = ext4_append(handle, dir, &newblock);
-        if (!(bh2)) {
+        if (IS_ERR(bh2)) {
                brelse(*bh);
                *bh = NULL;
-                goto errout;
+                *error = PTR_ERR(bh2);
+                return NULL;
        }
        BUFFER_TRACE(*bh, "get_write_access");
@@ -1621,7 +1634,6 @@ journal_error:
        brelse(bh2);
        *bh = NULL;
        ext4_std_error(dir->i_sb, err);
-errout:
        *error = err;
        return NULL;
 }
@@ -1699,7 +1711,6 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
        const char      *name = dentry->d_name.name;
        int             namelen = dentry->d_name.len;
        unsigned int    blocksize = dir->i_sb->s_blocksize;
-        unsigned short  reclen;
        int             csum_size = 0;
        int             err;
@@ -1707,7 +1718,6 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
                csum_size = sizeof(struct ext4_dir_entry_tail);
-        reclen = EXT4_DIR_REC_LEN(namelen);
        if (!de) {
                err = ext4_find_dest_de(dir, inode,
                                        bh, bh->b_data, blocksize - csum_size,
@@ -1798,10 +1808,10 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
        len = ((char *) root) + (blocksize - csum_size) - (char *) de;
        /* Allocate new block for the 0th block's dirents */
-        bh2 = ext4_append(handle, dir, &block, &retval);
+        bh2 = ext4_append(handle, dir, &block);
-        if (!(bh2)) {
+        if (IS_ERR(bh2)) {
                brelse(bh);
-                return retval;
+                return PTR_ERR(bh2);
        }
        ext4_set_inode_flag(dir, EXT4_INODE_INDEX);
        data1 = bh2->b_data;
@@ -1918,20 +1928,10 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
        }
        blocks = dir->i_size >> sb->s_blocksize_bits;
        for (block = 0; block < blocks; block++) {
-                if (!(bh = ext4_bread(handle, dir, block, 0, &retval))) {
+                bh = ext4_read_dirblock(dir, block, DIRENT);
-                        if (!retval) {
+                if (IS_ERR(bh))
-                                retval = -EIO;
+                        return PTR_ERR(bh);
-                                ext4_error(inode->i_sb,
-                                           "Directory hole detected on inode %lu\n",
-                                           inode->i_ino);
-                        }
-                        return retval;
-                }
-                if (!buffer_verified(bh) &&
-                    !ext4_dirent_csum_verify(dir,
-                                (struct ext4_dir_entry *)bh->b_data))
-                        return -EIO;
-                set_buffer_verified(bh);
                retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
                if (retval != -ENOSPC) {
                        brelse(bh);
@@ -1943,9 +1943,9 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
                        return make_indexed_dir(handle, dentry, inode, bh);
                brelse(bh);
        }
-        bh = ext4_append(handle, dir, &block, &retval);
+        bh = ext4_append(handle, dir, &block);
-        if (!bh)
+        if (IS_ERR(bh))
-                return retval;
+                return PTR_ERR(bh);
        de = (struct ext4_dir_entry_2 *) bh->b_data;
        de->inode = 0;
        de->rec_len = ext4_rec_len_to_disk(blocksize - csum_size, blocksize);
@@ -1982,22 +1982,13 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
                return err;
        entries = frame->entries;
        at = frame->at;
+        bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT);
-        if (!(bh = ext4_bread(handle, dir, dx_get_block(frame->at), 0, &err))) {
+        if (IS_ERR(bh)) {
-                if (!err) {
+                err = PTR_ERR(bh);
-                        err = -EIO;
+                bh = NULL;
-                        ext4_error(dir->i_sb,
-                                   "Directory hole detected on inode %lu\n",
-                                   dir->i_ino);
-                }
                goto cleanup;
        }
-        if (!buffer_verified(bh) &&
-            !ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data))
-                goto journal_error;
-        set_buffer_verified(bh);
        BUFFER_TRACE(bh, "get_write_access");
        err = ext4_journal_get_write_access(handle, bh);
        if (err)
@@ -2025,9 +2016,11 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
                        err = -ENOSPC;
                        goto cleanup;
                }
-                bh2 = ext4_append (handle, dir, &newblock, &err);
+                bh2 = ext4_append(handle, dir, &newblock);
-                if (!(bh2))
+                if (IS_ERR(bh2)) {
+                        err = PTR_ERR(bh2);
                        goto cleanup;
+                }
                node2 = (struct dx_node *)(bh2->b_data);
                entries2 = node2->entries;
                memset(&node2->fake, 0, sizeof(struct fake_dirent));
@@ -2106,8 +2099,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
 journal_error:
        ext4_std_error(dir->i_sb, err);
 cleanup:
-        if (bh)
+        brelse(bh);
-                brelse(bh);
        dx_release(frames);
        return err;
 }
@@ -2254,29 +2246,28 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 {
        handle_t *handle;
        struct inode *inode;
-        int err, retries = 0;
+        int err, credits, retries = 0;
        dquot_initialize(dir);
+        credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
+                   EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+                   EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
 retry:
-        handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
+        inode = ext4_new_inode_start_handle(dir, mode, &dentry->d_name, 0,
-                                        EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+                                            NULL, EXT4_HT_DIR, credits);
-                                        EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
+        handle = ext4_journal_current_handle();
-        if (IS_ERR(handle))
-                return PTR_ERR(handle);
-        if (IS_DIRSYNC(dir))
-                ext4_handle_sync(handle);
-        inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL);
        err = PTR_ERR(inode);
        if (!IS_ERR(inode)) {
                inode->i_op = &ext4_file_inode_operations;
                inode->i_fop = &ext4_file_operations;
                ext4_set_aops(inode);
                err = ext4_add_nondir(handle, dentry, inode);
+                if (!err && IS_DIRSYNC(dir))
+                        ext4_handle_sync(handle);
        }
-        ext4_journal_stop(handle);
+        if (handle)
+                ext4_journal_stop(handle);
        if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
                goto retry;
        return err;
@@ -2287,31 +2278,30 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry,
 {
        handle_t *handle;
        struct inode *inode;
-        int err, retries = 0;
+        int err, credits, retries = 0;
        if (!new_valid_dev(rdev))
                return -EINVAL;
        dquot_initialize(dir);
+        credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
+                   EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+                   EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
 retry:
-        handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
+        inode = ext4_new_inode_start_handle(dir, mode, &dentry->d_name, 0,
-                                        EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+                                            NULL, EXT4_HT_DIR, credits);
-                                        EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
+        handle = ext4_journal_current_handle();
-        if (IS_ERR(handle))
-                return PTR_ERR(handle);
-        if (IS_DIRSYNC(dir))
-                ext4_handle_sync(handle);
-        inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL);
        err = PTR_ERR(inode);
        if (!IS_ERR(inode)) {
                init_special_inode(inode, inode->i_mode, rdev);
                inode->i_op = &ext4_special_inode_operations;
                err = ext4_add_nondir(handle, dentry, inode);
+                if (!err && IS_DIRSYNC(dir))
+                        ext4_handle_sync(handle);
        }
-        ext4_journal_stop(handle);
+        if (handle)
+                ext4_journal_stop(handle);
        if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
                goto retry;
        return err;
@@ -2351,6 +2341,7 @@ static int ext4_init_new_dir(handle_t *handle, struct inode *dir,
        struct buffer_head *dir_block = NULL;
        struct ext4_dir_entry_2 *de;
        struct ext4_dir_entry_tail *t;
+        ext4_lblk_t block = 0;
        unsigned int blocksize = dir->i_sb->s_blocksize;
        int csum_size = 0;
        int err;
@@ -2367,17 +2358,10 @@ static int ext4_init_new_dir(handle_t *handle, struct inode *dir,
                        goto out;
        }
-        inode->i_size = EXT4_I(inode)->i_disksize = blocksize;
+        inode->i_size = 0;
-        dir_block = ext4_bread(handle, inode, 0, 1, &err);
+        dir_block = ext4_append(handle, inode, &block);
-        if (!(dir_block = ext4_bread(handle, inode, 0, 1, &err))) {
+        if (IS_ERR(dir_block))
-                if (!err) {
+                return PTR_ERR(dir_block);
-                        err = -EIO;
-                        ext4_error(inode->i_sb,
-                                   "Directory hole detected on inode %lu\n",
-                                   inode->i_ino);
-                }
-                goto out;
-        }
        BUFFER_TRACE(dir_block, "get_write_access");
        err = ext4_journal_get_write_access(handle, dir_block);
        if (err)
@@ -2404,25 +2388,21 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
        handle_t *handle;
        struct inode *inode;
-        int err, retries = 0;
+        int err, credits, retries = 0;
        if (EXT4_DIR_LINK_MAX(dir))
                return -EMLINK;
        dquot_initialize(dir);
+        credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
+                   EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+                   EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
 retry:
-        handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
+        inode = ext4_new_inode_start_handle(dir, S_IFDIR | mode,
-                                        EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+                                            &dentry->d_name,
-                                        EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
+                                            0, NULL, EXT4_HT_DIR, credits);
-        if (IS_ERR(handle))
+        handle = ext4_journal_current_handle();
-                return PTR_ERR(handle);
-        if (IS_DIRSYNC(dir))
-                ext4_handle_sync(handle);
-        inode = ext4_new_inode(handle, dir, S_IFDIR | mode,
-                               &dentry->d_name, 0, NULL);
        err = PTR_ERR(inode);
        if (IS_ERR(inode))
                goto out_stop;
@@ -2450,8 +2430,12 @@ out_clear_inode:
                goto out_clear_inode;
        unlock_new_inode(inode);
        d_instantiate(dentry, inode);
+        if (IS_DIRSYNC(dir))
+                ext4_handle_sync(handle);
 out_stop:
-        ext4_journal_stop(handle);
+        if (handle)
+                ext4_journal_stop(handle);
        if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
                goto retry;
        return err;
@@ -2477,25 +2461,14 @@ static int empty_dir(struct inode *inode)
        }
        sb = inode->i_sb;
-        if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) ||
+        if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2)) {
-            !(bh = ext4_bread(NULL, inode, 0, 0, &err))) {
+                EXT4_ERROR_INODE(inode, "invalid size");
-                if (err)
-                        EXT4_ERROR_INODE(inode,
-                                "error %d reading directory lblock 0", err);
-                else
-                        ext4_warning(inode->i_sb,
-                                     "bad directory (dir #%lu) - no data block",
-                                     inode->i_ino);
                return 1;
        }
-        if (!buffer_verified(bh) &&
+        bh = ext4_read_dirblock(inode, 0, EITHER);
-            !ext4_dirent_csum_verify(inode,
+        if (IS_ERR(bh))
-                        (struct ext4_dir_entry *)bh->b_data)) {
+                return 1;
-                EXT4_ERROR_INODE(inode, "checksum error reading directory "
-                                 "lblock 0");
-                return -EIO;
-        }
-        set_buffer_verified(bh);
        de = (struct ext4_dir_entry_2 *) bh->b_data;
        de1 = ext4_next_entry(de, sb->s_blocksize);
        if (le32_to_cpu(de->inode) != inode->i_ino ||
@@ -2518,28 +2491,9 @@ static int empty_dir(struct inode *inode)
                        err = 0;
                        brelse(bh);
                        lblock = offset >> EXT4_BLOCK_SIZE_BITS(sb);
-                        bh = ext4_bread(NULL, inode, lblock, 0, &err);
+                        bh = ext4_read_dirblock(inode, lblock, EITHER);
-                        if (!bh) {
+                        if (IS_ERR(bh))
-                                if (err)
+                                return 1;
-                                        EXT4_ERROR_INODE(inode,
-                                                "error %d reading directory "
-                                                "lblock %u", err, lblock);
-                                else
-                                        ext4_warning(inode->i_sb,
-                                                "bad directory (dir #%lu) - no data block",
-                                                inode->i_ino);
-                                offset += sb->s_blocksize;
-                                continue;
-                        }
-                        if (!buffer_verified(bh) &&
-                            !ext4_dirent_csum_verify(inode,
-                                        (struct ext4_dir_entry *)bh->b_data)) {
-                                EXT4_ERROR_INODE(inode, "checksum error "
-                                                 "reading directory lblock 0");
-                                return -EIO;
-                        }
-                        set_buffer_verified(bh);
                        de = (struct ext4_dir_entry_2 *) bh->b_data;
                }
                if (ext4_check_dir_entry(inode, NULL, de, bh,
@@ -2718,25 +2672,18 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
        struct inode *inode;
        struct buffer_head *bh;
        struct ext4_dir_entry_2 *de;
-        handle_t *handle;
+        handle_t *handle = NULL;
        /* Initialize quotas before so that eventual writes go in
         * separate transaction */
        dquot_initialize(dir);
        dquot_initialize(dentry->d_inode);
-        handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb));
-        if (IS_ERR(handle))
-                return PTR_ERR(handle);
        retval = -ENOENT;
        bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
        if (!bh)
                goto end_rmdir;
-        if (IS_DIRSYNC(dir))
-                ext4_handle_sync(handle);
        inode = dentry->d_inode;
        retval = -EIO;
@@ -2747,6 +2694,17 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
        if (!empty_dir(inode))
                goto end_rmdir;
+        handle = ext4_journal_start(dir, EXT4_HT_DIR,
+                                    EXT4_DATA_TRANS_BLOCKS(dir->i_sb));
+        if (IS_ERR(handle)) {
+                retval = PTR_ERR(handle);
+                handle = NULL;
+                goto end_rmdir;
+        }
+        if (IS_DIRSYNC(dir))
+                ext4_handle_sync(handle);
        retval = ext4_delete_entry(handle, dir, de, bh);
        if (retval)
                goto end_rmdir;
@@ -2768,8 +2726,9 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
        ext4_mark_inode_dirty(handle, dir);
 end_rmdir:
-        ext4_journal_stop(handle);
        brelse(bh);
+        if (handle)
+                ext4_journal_stop(handle);
        return retval;
 }
@@ -2779,7 +2738,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
        struct inode *inode;
        struct buffer_head *bh;
        struct ext4_dir_entry_2 *de;
-        handle_t *handle;
+        handle_t *handle = NULL;
        trace_ext4_unlink_enter(dir, dentry);
        /* Initialize quotas before so that eventual writes go
@@ -2787,13 +2746,6 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
        dquot_initialize(dir);
        dquot_initialize(dentry->d_inode);
-        handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb));
-        if (IS_ERR(handle))
-                return PTR_ERR(handle);
-        if (IS_DIRSYNC(dir))
-                ext4_handle_sync(handle);
        retval = -ENOENT;
        bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
        if (!bh)
@@ -2805,6 +2757,17 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
        if (le32_to_cpu(de->inode) != inode->i_ino)
                goto end_unlink;
+        handle = ext4_journal_start(dir, EXT4_HT_DIR,
+                                    EXT4_DATA_TRANS_BLOCKS(dir->i_sb));
+        if (IS_ERR(handle)) {
+                retval = PTR_ERR(handle);
+                handle = NULL;
+                goto end_unlink;
+        }
+        if (IS_DIRSYNC(dir))
+                ext4_handle_sync(handle);
        if (!inode->i_nlink) {
                ext4_warning(inode->i_sb,
                             "Deleting nonexistent file (%lu), %d",
@@ -2825,8 +2788,9 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
        retval = 0;
 end_unlink:
-        ext4_journal_stop(handle);
        brelse(bh);
+        if (handle)
+                ext4_journal_stop(handle);
        trace_ext4_unlink_exit(dentry, retval);
        return retval;
 }
@@ -2866,15 +2830,10 @@ static int ext4_symlink(struct inode *dir,
                          EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb);
        }
 retry:
-        handle = ext4_journal_start(dir, credits);
+        inode = ext4_new_inode_start_handle(dir, S_IFLNK|S_IRWXUGO,
-        if (IS_ERR(handle))
+                                            &dentry->d_name, 0, NULL,
-                return PTR_ERR(handle);
+                                            EXT4_HT_DIR, credits);
+        handle = ext4_journal_current_handle();
-        if (IS_DIRSYNC(dir))
-                ext4_handle_sync(handle);
-        inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO,
-                               &dentry->d_name, 0, NULL);
        err = PTR_ERR(inode);
        if (IS_ERR(inode))
                goto out_stop;
@@ -2904,7 +2863,7 @@ retry:
                 * Now inode is being linked into dir (EXT4_DATA_TRANS_BLOCKS
                 * + EXT4_INDEX_EXTRA_TRANS_BLOCKS), inode is also modified
                 */
-                handle = ext4_journal_start(dir,
+                handle = ext4_journal_start(dir, EXT4_HT_DIR,
                                EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
                                EXT4_INDEX_EXTRA_TRANS_BLOCKS + 1);
                if (IS_ERR(handle)) {
@@ -2927,8 +2886,12 @@ retry:
        }
        EXT4_I(inode)->i_disksize = inode->i_size;
        err = ext4_add_nondir(handle, dentry, inode);
+        if (!err && IS_DIRSYNC(dir))
+                ext4_handle_sync(handle);
 out_stop:
-        ext4_journal_stop(handle);
+        if (handle)
+                ext4_journal_stop(handle);
        if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
                goto retry;
        return err;
@@ -2951,8 +2914,9 @@ static int ext4_link(struct dentry *old_dentry,
        dquot_initialize(dir);
 retry:
-        handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
+        handle = ext4_journal_start(dir, EXT4_HT_DIR,
-                                        EXT4_INDEX_EXTRA_TRANS_BLOCKS);
+                (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
+                 EXT4_INDEX_EXTRA_TRANS_BLOCKS));
        if (IS_ERR(handle))
                return PTR_ERR(handle);
@@ -2992,13 +2956,9 @@ static struct buffer_head *ext4_get_first_dir_block(handle_t *handle,
        struct buffer_head *bh;
        if (!ext4_has_inline_data(inode)) {
-                if (!(bh = ext4_bread(handle, inode, 0, 0, retval))) {
+                bh = ext4_read_dirblock(inode, 0, EITHER);
-                        if (!*retval) {
+                if (IS_ERR(bh)) {
-                                *retval = -EIO;
+                        *retval = PTR_ERR(bh);
-                                ext4_error(inode->i_sb,
-                                           "Directory hole detected on inode %lu\n",
-                                           inode->i_ino);
-                        }
                        return NULL;
                }
                *parent_de = ext4_next_entry(
@@ -3035,9 +2995,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
         * in separate transaction */
        if (new_dentry->d_inode)
                dquot_initialize(new_dentry->d_inode);
-        handle = ext4_journal_start(old_dir, 2 *
+        handle = ext4_journal_start(old_dir, EXT4_HT_DIR,
-                                        EXT4_DATA_TRANS_BLOCKS(old_dir->i_sb) +
+                (2 * EXT4_DATA_TRANS_BLOCKS(old_dir->i_sb) +
-                                        EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2);
+                 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2));
        if (IS_ERR(handle))
                return PTR_ERR(handle);
@@ -3077,11 +3037,6 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
                                                  &inlined);
                if (!dir_bh)
                        goto end_rename;
-                if (!inlined && !buffer_verified(dir_bh) &&
-                    !ext4_dirent_csum_verify(old_inode,
-                                (struct ext4_dir_entry *)dir_bh->b_data))
-                        goto end_rename;
-                set_buffer_verified(dir_bh);
                if (le32_to_cpu(parent_de->inode) != old_dir->i_ino)
                        goto end_rename;
                retval = -EMLINK;
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 0016fbca2a40..809b31003ecc 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -23,6 +23,7 @@
 #include <linux/workqueue.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
+#include <linux/mm.h>
 #include "ext4_jbd2.h"
 #include "xattr.h"
@@ -73,8 +74,6 @@ void ext4_free_io_end(ext4_io_end_t *io)
        BUG_ON(!list_empty(&io->list));
        BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN);
-        if (io->page)
-                put_page(io->page);
        for (i = 0; i < io->num_io_pages; i++)
                put_io_page(io->pages[i]);
        io->num_io_pages = 0;
@@ -103,14 +102,13 @@ static int ext4_end_io(ext4_io_end_t *io)
                         "(inode %lu, offset %llu, size %zd, error %d)",
                         inode->i_ino, offset, size, ret);
        }
-        if (io->iocb)
-                aio_complete(io->iocb, io->result, 0);
-        if (io->flag & EXT4_IO_END_DIRECT)
-                inode_dio_done(inode);
        /* Wake up anyone waiting on unwritten extent conversion */
        if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten))
                wake_up_all(ext4_ioend_wq(inode));
+        if (io->flag & EXT4_IO_END_DIRECT)
+                inode_dio_done(inode);
+        if (io->iocb)
+                aio_complete(io->iocb, io->result, 0);
        return ret;
 }
@@ -119,7 +117,6 @@ static void dump_completed_IO(struct inode *inode)
 #ifdef  EXT4FS_DEBUG
        struct list_head *cur, *before, *after;
        ext4_io_end_t *io, *io0, *io1;
-        unsigned long flags;
        if (list_empty(&EXT4_I(inode)->i_completed_io_list)) {
                ext4_debug("inode %lu completed_io list is empty\n",
@@ -152,26 +149,20 @@ void ext4_add_complete_io(ext4_io_end_t *io_end)
        wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
        spin_lock_irqsave(&ei->i_completed_io_lock, flags);
-        if (list_empty(&ei->i_completed_io_list)) {
+        if (list_empty(&ei->i_completed_io_list))
-                io_end->flag |= EXT4_IO_END_QUEUED;
+                queue_work(wq, &ei->i_unwritten_work);
-                queue_work(wq, &io_end->work);
-        }
        list_add_tail(&io_end->list, &ei->i_completed_io_list);
        spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
 }
-static int ext4_do_flush_completed_IO(struct inode *inode,
+static int ext4_do_flush_completed_IO(struct inode *inode)
-                                      ext4_io_end_t *work_io)
 {
        ext4_io_end_t *io;
-        struct list_head unwritten, complete, to_free;
+        struct list_head unwritten;
        unsigned long flags;
        struct ext4_inode_info *ei = EXT4_I(inode);
        int err, ret = 0;
-        INIT_LIST_HEAD(&complete);
-        INIT_LIST_HEAD(&to_free);
        spin_lock_irqsave(&ei->i_completed_io_lock, flags);
        dump_completed_IO(inode);
        list_replace_init(&ei->i_completed_io_list, &unwritten);
@@ -185,32 +176,7 @@ static int ext4_do_flush_completed_IO(struct inode *inode,
                err = ext4_end_io(io);
                if (unlikely(!ret && err))
                        ret = err;
-                list_add_tail(&io->list, &complete);
-        }
-        spin_lock_irqsave(&ei->i_completed_io_lock, flags);
-        while (!list_empty(&complete)) {
-                io = list_entry(complete.next, ext4_io_end_t, list);
                io->flag &= ~EXT4_IO_END_UNWRITTEN;
-                /* end_io context can not be destroyed now because it still
-                 * used by queued worker. Worker thread will destroy it later */
-                if (io->flag & EXT4_IO_END_QUEUED)
-                        list_del_init(&io->list);
-                else
-                        list_move(&io->list, &to_free);
-        }
-        /* If we are called from worker context, it is time to clear queued
-         * flag, and destroy it's end_io if it was converted already */
-        if (work_io) {
-                work_io->flag &= ~EXT4_IO_END_QUEUED;
-                if (!(work_io->flag & EXT4_IO_END_UNWRITTEN))
-                        list_add_tail(&work_io->list, &to_free);
-        }
-        spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
-        while (!list_empty(&to_free)) {
-                io = list_entry(to_free.next, ext4_io_end_t, list);
-                list_del_init(&io->list);
                ext4_free_io_end(io);
        }
        return ret;
@@ -219,10 +185,11 @@ static int ext4_do_flush_completed_IO(struct inode *inode,
 /*
 * work on completed aio dio IO, to convert unwritten extents to extents
 */
-static void ext4_end_io_work(struct work_struct *work)
+void ext4_end_io_work(struct work_struct *work)
 {
-        ext4_io_end_t *io = container_of(work, ext4_io_end_t, work);
+        struct ext4_inode_info *ei = container_of(work, struct ext4_inode_info,
-        ext4_do_flush_completed_IO(io->inode, io);
+                                                  i_unwritten_work);
+        ext4_do_flush_completed_IO(&ei->vfs_inode);
 }
 int ext4_flush_unwritten_io(struct inode *inode)
@@ -230,7 +197,7 @@ int ext4_flush_unwritten_io(struct inode *inode)
        int ret;
        WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex) &&
                     !(inode->i_state & I_FREEING));
-        ret = ext4_do_flush_completed_IO(inode, NULL);
+        ret = ext4_do_flush_completed_IO(inode);
        ext4_unwritten_wait(inode);
        return ret;
 }
@@ -241,7 +208,6 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
        if (io) {
                atomic_inc(&EXT4_I(inode)->i_ioend_count);
                io->inode = inode;
-                INIT_WORK(&io->work, ext4_end_io_work);
                INIT_LIST_HEAD(&io->list);
        }
        return io;
@@ -382,14 +348,6 @@ static int io_submit_add_bh(struct ext4_io_submit *io,
                unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
        }
-        if (!buffer_mapped(bh) || buffer_delay(bh)) {
-                if (!buffer_mapped(bh))
-                        clear_buffer_dirty(bh);
-                if (io->io_bio)
-                        ext4_io_submit(io);
-                return 0;
-        }
        if (io->io_bio && bh->b_blocknr != io->io_next_block) {
 submit_and_retry:
                ext4_io_submit(io);
@@ -436,7 +394,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
        io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS);
        if (!io_page) {
-                set_page_dirty(page);
+                redirty_page_for_writepage(wbc, page);
                unlock_page(page);
                return -ENOMEM;
        }
@@ -468,7 +426,15 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
                        set_buffer_uptodate(bh);
                        continue;
                }
-                clear_buffer_dirty(bh);
+                if (!buffer_dirty(bh) || buffer_delay(bh) ||
+                    !buffer_mapped(bh) || buffer_unwritten(bh)) {
+                        /* A hole? We can safely clear the dirty bit */
+                        if (!buffer_mapped(bh))
+                                clear_buffer_dirty(bh);
+                        if (io->io_bio)
+                                ext4_io_submit(io);
+                        continue;
+                }
                ret = io_submit_add_bh(io, io_page, inode, wbc, bh);
                if (ret) {
                        /*
@@ -476,9 +442,10 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
                         * we can do but mark the page as dirty, and
                         * better luck next time.
                         */
-                        set_page_dirty(page);
+                        redirty_page_for_writepage(wbc, page);
                        break;
                }
+                clear_buffer_dirty(bh);
        }
        unlock_page(page);
        /*
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index d99387b89edd..c7f4d7584669 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -333,8 +333,8 @@ static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
        int err;
        bh = sb_getblk(sb, blk);
-        if (!bh)
+        if (unlikely(!bh))
-                return ERR_PTR(-EIO);
+                return ERR_PTR(-ENOMEM);
        if ((err = ext4_journal_get_write_access(handle, bh))) {
                brelse(bh);
                bh = ERR_PTR(err);
@@ -410,8 +410,8 @@ static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle,
                        return err;
                bh = sb_getblk(sb, flex_gd->groups[group].block_bitmap);
-                if (!bh)
+                if (unlikely(!bh))
-                        return -EIO;
+                        return -ENOMEM;
                err = ext4_journal_get_write_access(handle, bh);
                if (err)
@@ -466,7 +466,7 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
        meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG);
        /* This transaction may be extended/restarted along the way */
-        handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA);
+        handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, EXT4_MAX_TRANS_DATA);
        if (IS_ERR(handle))
                return PTR_ERR(handle);
@@ -500,8 +500,8 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
                                goto out;
                        gdb = sb_getblk(sb, block);
-                        if (!gdb) {
+                        if (unlikely(!gdb)) {
-                                err = -EIO;
+                                err = -ENOMEM;
                                goto out;
                        }
@@ -1031,7 +1031,7 @@ static void update_backups(struct super_block *sb, int blk_off, char *data,
        handle_t *handle;
        int err = 0, err2;
-        handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA);
+        handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, EXT4_MAX_TRANS_DATA);
        if (IS_ERR(handle)) {
                group = 1;
                err = PTR_ERR(handle);
@@ -1064,8 +1064,8 @@ static void update_backups(struct super_block *sb, int blk_off, char *data,
                                        ext4_bg_has_super(sb, group));
                bh = sb_getblk(sb, backup_block);
-                if (!bh) {
+                if (unlikely(!bh)) {
-                        err = -EIO;
+                        err = -ENOMEM;
                        break;
                }
                ext4_debug("update metadata backup %llu(+%llu)\n",
@@ -1168,7 +1168,7 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb,
 static struct buffer_head *ext4_get_bitmap(struct super_block *sb, __u64 block)
 {
        struct buffer_head *bh = sb_getblk(sb, block);
-        if (!bh)
+        if (unlikely(!bh))
                return NULL;
        if (!bh_uptodate_or_lock(bh)) {
                if (bh_submit_read(bh) < 0) {
@@ -1412,7 +1412,7 @@ static int ext4_flex_group_add(struct super_block *sb,
         * modify each of the reserved GDT dindirect blocks.
         */
        credit = flex_gd->count * 4 + reserved_gdb;
-        handle = ext4_journal_start_sb(sb, credit);
+        handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, credit);
        if (IS_ERR(handle)) {
                err = PTR_ERR(handle);
                goto exit;
@@ -1506,10 +1506,12 @@ static int ext4_setup_next_flex_gd(struct super_block *sb,
                group_data[i].blocks_count = blocks_per_group;
                overhead = ext4_group_overhead_blocks(sb, group + i);
                group_data[i].free_blocks_count = blocks_per_group - overhead;
-                if (ext4_has_group_desc_csum(sb))
+                if (ext4_has_group_desc_csum(sb)) {
                        flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT |
                                               EXT4_BG_INODE_UNINIT;
-                else
+                        if (!test_opt(sb, INIT_INODE_TABLE))
+                                flex_gd->bg_flags[i] |= EXT4_BG_INODE_ZEROED;
+                } else
                        flex_gd->bg_flags[i] = EXT4_BG_INODE_ZEROED;
        }
@@ -1594,7 +1596,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
        err = ext4_alloc_flex_bg_array(sb, input->group + 1);
        if (err)
-                return err;
+                goto out;
        err = ext4_mb_alloc_groupinfo(sb, input->group + 1);
        if (err)
@@ -1622,7 +1624,7 @@ static int ext4_group_extend_no_check(struct super_block *sb,
        /* We will update the superblock, one block bitmap, and
         * one group descriptor via ext4_group_add_blocks().
         */
-        handle = ext4_journal_start_sb(sb, 3);
+        handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, 3);
        if (IS_ERR(handle)) {
                err = PTR_ERR(handle);
                ext4_warning(sb, "error %d on journal start", err);
@@ -1786,7 +1788,7 @@ static int ext4_convert_meta_bg(struct super_block *sb, struct inode *inode)
                credits += 3;   /* block bitmap, bg descriptor, resize inode */
        }
-        handle = ext4_journal_start_sb(sb, credits);
+        handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, credits);
        if (IS_ERR(handle))
                return PTR_ERR(handle);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 3d4fb81bacd5..620cf5615ba2 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -69,8 +69,6 @@ static void ext4_mark_recovery_complete(struct super_block *sb,
 static void ext4_clear_journal_err(struct super_block *sb,
                                   struct ext4_super_block *es);
 static int ext4_sync_fs(struct super_block *sb, int wait);
-static const char *ext4_decode_error(struct super_block *sb, int errno,
-                                     char nbuf[16]);
 static int ext4_remount(struct super_block *sb, int *flags, char *data);
 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
 static int ext4_unfreeze(struct super_block *sb);
@@ -296,107 +294,6 @@ void ext4_itable_unused_set(struct super_block *sb,
 }
-/* Just increment the non-pointer handle value */
-static handle_t *ext4_get_nojournal(void)
-{
-        handle_t *handle = current->journal_info;
-        unsigned long ref_cnt = (unsigned long)handle;
-        BUG_ON(ref_cnt >= EXT4_NOJOURNAL_MAX_REF_COUNT);
-        ref_cnt++;
-        handle = (handle_t *)ref_cnt;
-        current->journal_info = handle;
-        return handle;
-}
-/* Decrement the non-pointer handle value */
-static void ext4_put_nojournal(handle_t *handle)
-{
-        unsigned long ref_cnt = (unsigned long)handle;
-        BUG_ON(ref_cnt == 0);
-        ref_cnt--;
-        handle = (handle_t *)ref_cnt;
-        current->journal_info = handle;
-}
-/*
- * Wrappers for jbd2_journal_start/end.
- */
-handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
-{
-        journal_t *journal;
-        trace_ext4_journal_start(sb, nblocks, _RET_IP_);
-        if (sb->s_flags & MS_RDONLY)
-                return ERR_PTR(-EROFS);
-        WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE);
-        journal = EXT4_SB(sb)->s_journal;
-        if (!journal)
-                return ext4_get_nojournal();
-        /*
-         * Special case here: if the journal has aborted behind our
-         * backs (eg. EIO in the commit thread), then we still need to
-         * take the FS itself readonly cleanly.
-         */
-        if (is_journal_aborted(journal)) {
-                ext4_abort(sb, "Detected aborted journal");
-                return ERR_PTR(-EROFS);
-        }
-        return jbd2_journal_start(journal, nblocks);
-}
-int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle)
-{
-        struct super_block *sb;
-        int err;
-        int rc;
-        if (!ext4_handle_valid(handle)) {
-                ext4_put_nojournal(handle);
-                return 0;
-        }
-        sb = handle->h_transaction->t_journal->j_private;
-        err = handle->h_err;
-        rc = jbd2_journal_stop(handle);
-        if (!err)
-                err = rc;
-        if (err)
-                __ext4_std_error(sb, where, line, err);
-        return err;
-}
-void ext4_journal_abort_handle(const char *caller, unsigned int line,
-                               const char *err_fn, struct buffer_head *bh,
-                               handle_t *handle, int err)
-{
-        char nbuf[16];
-        const char *errstr = ext4_decode_error(NULL, err, nbuf);
-        BUG_ON(!ext4_handle_valid(handle));
-        if (bh)
-                BUFFER_TRACE(bh, "abort");
-        if (!handle->h_err)
-                handle->h_err = err;
-        if (is_handle_aborted(handle))
-                return;
-        printk(KERN_ERR "EXT4-fs: %s:%d: aborting transaction: %s in %s\n",
-               caller, line, errstr, err_fn);
-        jbd2_journal_abort_handle(handle);
-}
 static void __save_error_info(struct super_block *sb, const char *func,
                            unsigned int line)
 {
@@ -553,7 +450,7 @@ void ext4_error_file(struct file *file, const char *function,
        va_list args;
        struct va_format vaf;
        struct ext4_super_block *es;
-        struct inode *inode = file->f_dentry->d_inode;
+        struct inode *inode = file_inode(file);
        char pathname[80], *path;
        es = EXT4_SB(inode->i_sb)->s_es;
@@ -582,8 +479,8 @@ void ext4_error_file(struct file *file, const char *function,
        ext4_handle_error(inode->i_sb);
 }
-static const char *ext4_decode_error(struct super_block *sb, int errno,
+const char *ext4_decode_error(struct super_block *sb, int errno,
-                                     char nbuf[16])
+                              char nbuf[16])
 {
        char *errstr = NULL;
@@ -858,6 +755,7 @@ static void ext4_put_super(struct super_block *sb)
                        ext4_abort(sb, "Couldn't clean up the journal");
        }
+        ext4_es_unregister_shrinker(sb);
        del_timer(&sbi->s_err_report);
        ext4_release_system_zone(sb);
        ext4_mb_release(sb);
@@ -939,11 +837,12 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
                return NULL;
        ei->vfs_inode.i_version = 1;
-        memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
        INIT_LIST_HEAD(&ei->i_prealloc_list);
        spin_lock_init(&ei->i_prealloc_lock);
        ext4_es_init_tree(&ei->i_es_tree);
        rwlock_init(&ei->i_es_lock);
+        INIT_LIST_HEAD(&ei->i_es_lru);
+        ei->i_es_lru_nr = 0;
        ei->i_reserved_data_blocks = 0;
        ei->i_reserved_meta_blocks = 0;
        ei->i_allocated_meta_blocks = 0;
@@ -960,6 +859,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
        ei->i_datasync_tid = 0;
        atomic_set(&ei->i_ioend_count, 0);
        atomic_set(&ei->i_unwritten, 0);
+        INIT_WORK(&ei->i_unwritten_work, ext4_end_io_work);
        return &ei->vfs_inode;
 }
@@ -1031,6 +931,7 @@ void ext4_clear_inode(struct inode *inode)
        dquot_drop(inode);
        ext4_discard_preallocations(inode);
        ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
+        ext4_es_lru_del(inode);
        if (EXT4_I(inode)->jinode) {
                jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
                                               EXT4_I(inode)->jinode);
@@ -1280,8 +1181,8 @@ static const match_table_t tokens = {
        {Opt_stripe, "stripe=%u"},
        {Opt_delalloc, "delalloc"},
        {Opt_nodelalloc, "nodelalloc"},
-        {Opt_mblk_io_submit, "mblk_io_submit"},
+        {Opt_removed, "mblk_io_submit"},
-        {Opt_nomblk_io_submit, "nomblk_io_submit"},
+        {Opt_removed, "nomblk_io_submit"},
        {Opt_block_validity, "block_validity"},
        {Opt_noblock_validity, "noblock_validity"},
        {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
@@ -1337,6 +1238,7 @@ static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
 {
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        char *qname;
+        int ret = -1;
        if (sb_any_quota_loaded(sb) &&
                !sbi->s_qf_names[qtype]) {
@@ -1351,23 +1253,26 @@ static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
                        "Not enough memory for storing quotafile name");
                return -1;
        }
-        if (sbi->s_qf_names[qtype] &&
+        if (sbi->s_qf_names[qtype]) {
-                strcmp(sbi->s_qf_names[qtype], qname)) {
+                if (strcmp(sbi->s_qf_names[qtype], qname) == 0)
-                ext4_msg(sb, KERN_ERR,
+                        ret = 1;
-                        "%s quota file already specified", QTYPE2NAME(qtype));
+                else
-                kfree(qname);
+                        ext4_msg(sb, KERN_ERR,
-                return -1;
+                                 "%s quota file already specified",
+                                 QTYPE2NAME(qtype));
+                goto errout;
        }
-        sbi->s_qf_names[qtype] = qname;
+        if (strchr(qname, '/')) {
-        if (strchr(sbi->s_qf_names[qtype], '/')) {
                ext4_msg(sb, KERN_ERR,
                        "quotafile must be on filesystem root");
-                kfree(sbi->s_qf_names[qtype]);
+                goto errout;
-                sbi->s_qf_names[qtype] = NULL;
-                return -1;
        }
+        sbi->s_qf_names[qtype] = qname;
        set_opt(sb, QUOTA);
        return 1;
+errout:
+        kfree(qname);
+        return ret;
 }
 static int clear_qf_name(struct super_block *sb, int qtype)
@@ -1381,10 +1286,7 @@ static int clear_qf_name(struct super_block *sb, int qtype)
                        " when quota turned on");
                return -1;
        }
-        /*
+        kfree(sbi->s_qf_names[qtype]);
-         * The space will be released later when all options are confirmed
-         * to be correct
-         */
        sbi->s_qf_names[qtype] = NULL;
        return 1;
 }
@@ -1404,6 +1306,9 @@ static int clear_qf_name(struct super_block *sb, int qtype)
 #define MOPT_QFMT       MOPT_NOSUPPORT
 #endif
 #define MOPT_DATAJ      0x0080
+#define MOPT_NO_EXT2    0x0100
+#define MOPT_NO_EXT3    0x0200
+#define MOPT_EXT4_ONLY  (MOPT_NO_EXT2 | MOPT_NO_EXT3)
 static const struct mount_opts {
        int     token;
@@ -1414,25 +1319,31 @@ static const struct mount_opts {
        {Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR},
        {Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET},
        {Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR},
-        {Opt_mblk_io_submit, EXT4_MOUNT_MBLK_IO_SUBMIT, MOPT_SET},
-        {Opt_nomblk_io_submit, EXT4_MOUNT_MBLK_IO_SUBMIT, MOPT_CLEAR},
        {Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET},
        {Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR},
-        {Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK, MOPT_SET},
+        {Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK,
-        {Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK, MOPT_CLEAR},
+         MOPT_EXT4_ONLY | MOPT_SET},
+        {Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK,
+         MOPT_EXT4_ONLY | MOPT_CLEAR},
        {Opt_discard, EXT4_MOUNT_DISCARD, MOPT_SET},
        {Opt_nodiscard, EXT4_MOUNT_DISCARD, MOPT_CLEAR},
-        {Opt_delalloc, EXT4_MOUNT_DELALLOC, MOPT_SET | MOPT_EXPLICIT},
+        {Opt_delalloc, EXT4_MOUNT_DELALLOC,
-        {Opt_nodelalloc, EXT4_MOUNT_DELALLOC, MOPT_CLEAR | MOPT_EXPLICIT},
+         MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
-        {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, MOPT_SET},
+        {Opt_nodelalloc, EXT4_MOUNT_DELALLOC,
+         MOPT_EXT4_ONLY | MOPT_CLEAR | MOPT_EXPLICIT},
+        {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
+         MOPT_EXT4_ONLY | MOPT_SET},
        {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT |
-                                    EXT4_MOUNT_JOURNAL_CHECKSUM), MOPT_SET},
+                                    EXT4_MOUNT_JOURNAL_CHECKSUM),
-        {Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_SET},
+         MOPT_EXT4_ONLY | MOPT_SET},
+        {Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_NO_EXT2 | MOPT_SET},
        {Opt_err_panic, EXT4_MOUNT_ERRORS_PANIC, MOPT_SET | MOPT_CLEAR_ERR},
        {Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR},
        {Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET | MOPT_CLEAR_ERR},
-        {Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT, MOPT_SET},
+        {Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT,
-        {Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT, MOPT_CLEAR},
+         MOPT_NO_EXT2 | MOPT_SET},
+        {Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT,
+         MOPT_NO_EXT2 | MOPT_CLEAR},
        {Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET},
        {Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR},
        {Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET},
@@ -1444,9 +1355,14 @@ static const struct mount_opts {
        {Opt_inode_readahead_blks, 0, MOPT_GTE0},
        {Opt_init_itable, 0, MOPT_GTE0},
        {Opt_stripe, 0, MOPT_GTE0},
-        {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_DATAJ},
+        {Opt_resuid, 0, MOPT_GTE0},
-        {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_DATAJ},
+        {Opt_resgid, 0, MOPT_GTE0},
-        {Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA, MOPT_DATAJ},
+        {Opt_journal_dev, 0, MOPT_GTE0},
+        {Opt_journal_ioprio, 0, MOPT_GTE0},
+        {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
+        {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
+        {Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA,
+         MOPT_NO_EXT2 | MOPT_DATAJ},
        {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET},
        {Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR},
 #ifdef CONFIG_EXT4_FS_POSIX_ACL
@@ -1496,8 +1412,6 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
        else if (token == Opt_offgrpjquota)
                return clear_qf_name(sb, GRPQUOTA);
 #endif
-        if (args->from && match_int(args, &arg))
-                return -1;
        switch (token) {
        case Opt_noacl:
        case Opt_nouser_xattr:
@@ -1506,138 +1420,149 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
        case Opt_sb:
                return 1;       /* handled by get_sb_block() */
        case Opt_removed:
-                ext4_msg(sb, KERN_WARNING,
+                ext4_msg(sb, KERN_WARNING, "Ignoring removed %s option", opt);
-                         "Ignoring removed %s option", opt);
+                return 1;
+        case Opt_abort:
+                sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
+                return 1;
+        case Opt_i_version:
+                sb->s_flags |= MS_I_VERSION;
                return 1;
-        case Opt_resuid:
+        }
+        for (m = ext4_mount_opts; m->token != Opt_err; m++)
+                if (token == m->token)
+                        break;
+        if (m->token == Opt_err) {
+                ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" "
+                         "or missing value", opt);
+                return -1;
+        }
+        if ((m->flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) {
+                ext4_msg(sb, KERN_ERR,
+                         "Mount option \"%s\" incompatible with ext2", opt);
+                return -1;
+        }
+        if ((m->flags & MOPT_NO_EXT3) && IS_EXT3_SB(sb)) {
+                ext4_msg(sb, KERN_ERR,
+                         "Mount option \"%s\" incompatible with ext3", opt);
+                return -1;
+        }
+        if (args->from && match_int(args, &arg))
+                return -1;
+        if (args->from && (m->flags & MOPT_GTE0) && (arg < 0))
+                return -1;
+        if (m->flags & MOPT_EXPLICIT)
+                set_opt2(sb, EXPLICIT_DELALLOC);
+        if (m->flags & MOPT_CLEAR_ERR)
+                clear_opt(sb, ERRORS_MASK);
+        if (token == Opt_noquota && sb_any_quota_loaded(sb)) {
+                ext4_msg(sb, KERN_ERR, "Cannot change quota "
+                         "options when quota turned on");
+                return -1;
+        }
+        if (m->flags & MOPT_NOSUPPORT) {
+                ext4_msg(sb, KERN_ERR, "%s option not supported", opt);
+        } else if (token == Opt_commit) {
+                if (arg == 0)
+                        arg = JBD2_DEFAULT_MAX_COMMIT_AGE;
+                sbi->s_commit_interval = HZ * arg;
+        } else if (token == Opt_max_batch_time) {
+                if (arg == 0)
+                        arg = EXT4_DEF_MAX_BATCH_TIME;
+                sbi->s_max_batch_time = arg;
+        } else if (token == Opt_min_batch_time) {
+                sbi->s_min_batch_time = arg;
+        } else if (token == Opt_inode_readahead_blks) {
+                if (arg && (arg > (1 << 30) || !is_power_of_2(arg))) {
+                        ext4_msg(sb, KERN_ERR,
+                                 "EXT4-fs: inode_readahead_blks must be "
+                                 "0 or a power of 2 smaller than 2^31");
+                        return -1;
+                }
+                sbi->s_inode_readahead_blks = arg;
+        } else if (token == Opt_init_itable) {
+                set_opt(sb, INIT_INODE_TABLE);
+                if (!args->from)
+                        arg = EXT4_DEF_LI_WAIT_MULT;
+                sbi->s_li_wait_mult = arg;
+        } else if (token == Opt_max_dir_size_kb) {
+                sbi->s_max_dir_size_kb = arg;
+        } else if (token == Opt_stripe) {
+                sbi->s_stripe = arg;
+        } else if (token == Opt_resuid) {
                uid = make_kuid(current_user_ns(), arg);
                if (!uid_valid(uid)) {
                        ext4_msg(sb, KERN_ERR, "Invalid uid value %d", arg);
                        return -1;
                }
                sbi->s_resuid = uid;
-                return 1;
+        } else if (token == Opt_resgid) {
-        case Opt_resgid:
                gid = make_kgid(current_user_ns(), arg);
                if (!gid_valid(gid)) {
                        ext4_msg(sb, KERN_ERR, "Invalid gid value %d", arg);
                        return -1;
                }
                sbi->s_resgid = gid;
-                return 1;
+        } else if (token == Opt_journal_dev) {
-        case Opt_abort:
-                sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
-                return 1;
-        case Opt_i_version:
-                sb->s_flags |= MS_I_VERSION;
-                return 1;
-        case Opt_journal_dev:
                if (is_remount) {
                        ext4_msg(sb, KERN_ERR,
                                 "Cannot specify journal on remount");
                        return -1;
                }
                *journal_devnum = arg;
-                return 1;
+        } else if (token == Opt_journal_ioprio) {
-        case Opt_journal_ioprio:
+                if (arg > 7) {
-                if (arg < 0 || arg > 7)
+                        ext4_msg(sb, KERN_ERR, "Invalid journal IO priority"
-                        return -1;
+                                 " (must be 0-7)");
-                *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg);
-                return 1;
-        }
-        for (m = ext4_mount_opts; m->token != Opt_err; m++) {
-                if (token != m->token)
-                        continue;
-                if (args->from && (m->flags & MOPT_GTE0) && (arg < 0))
-                        return -1;
-                if (m->flags & MOPT_EXPLICIT)
-                        set_opt2(sb, EXPLICIT_DELALLOC);
-                if (m->flags & MOPT_CLEAR_ERR)
-                        clear_opt(sb, ERRORS_MASK);
-                if (token == Opt_noquota && sb_any_quota_loaded(sb)) {
-                        ext4_msg(sb, KERN_ERR, "Cannot change quota "
-                                 "options when quota turned on");
                        return -1;
                }
+                *journal_ioprio =
-                if (m->flags & MOPT_NOSUPPORT) {
+                        IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg);
-                        ext4_msg(sb, KERN_ERR, "%s option not supported", opt);
+        } else if (m->flags & MOPT_DATAJ) {
-                } else if (token == Opt_commit) {
+                if (is_remount) {
-                        if (arg == 0)
+                        if (!sbi->s_journal)
-                                arg = JBD2_DEFAULT_MAX_COMMIT_AGE;
+                                ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option");
-                        sbi->s_commit_interval = HZ * arg;
+                        else if (test_opt(sb, DATA_FLAGS) != m->mount_opt) {
-                } else if (token == Opt_max_batch_time) {
-                        if (arg == 0)
-                                arg = EXT4_DEF_MAX_BATCH_TIME;
-                        sbi->s_max_batch_time = arg;
-                } else if (token == Opt_min_batch_time) {
-                        sbi->s_min_batch_time = arg;
-                } else if (token == Opt_inode_readahead_blks) {
-                        if (arg > (1 << 30))
-                                return -1;
-                        if (arg && !is_power_of_2(arg)) {
                                ext4_msg(sb, KERN_ERR,
-                                         "EXT4-fs: inode_readahead_blks"
-                                         " must be a power of 2");
-                                return -1;
-                        }
-                        sbi->s_inode_readahead_blks = arg;
-                } else if (token == Opt_init_itable) {
-                        set_opt(sb, INIT_INODE_TABLE);
-                        if (!args->from)
-                                arg = EXT4_DEF_LI_WAIT_MULT;
-                        sbi->s_li_wait_mult = arg;
-                } else if (token == Opt_max_dir_size_kb) {
-                        sbi->s_max_dir_size_kb = arg;
-                } else if (token == Opt_stripe) {
-                        sbi->s_stripe = arg;
-                } else if (m->flags & MOPT_DATAJ) {
-                        if (is_remount) {
-                                if (!sbi->s_journal)
-                                        ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option");
-                                else if (test_opt(sb, DATA_FLAGS) !=
-                                         m->mount_opt) {
-                                        ext4_msg(sb, KERN_ERR,
                                         "Cannot change data mode on remount");
-                                        return -1;
-                                }
-                        } else {
-                                clear_opt(sb, DATA_FLAGS);
-                                sbi->s_mount_opt |= m->mount_opt;
-                        }
-#ifdef CONFIG_QUOTA
-                } else if (m->flags & MOPT_QFMT) {
-                        if (sb_any_quota_loaded(sb) &&
-                            sbi->s_jquota_fmt != m->mount_opt) {
-                                ext4_msg(sb, KERN_ERR, "Cannot "
-                                         "change journaled quota options "
-                                         "when quota turned on");
                                return -1;
                        }
-                        sbi->s_jquota_fmt = m->mount_opt;
-#endif
                } else {
-                        if (!args->from)
+                        clear_opt(sb, DATA_FLAGS);
-                                arg = 1;
+                        sbi->s_mount_opt |= m->mount_opt;
-                        if (m->flags & MOPT_CLEAR)
-                                arg = !arg;
-                        else if (unlikely(!(m->flags & MOPT_SET))) {
-                                ext4_msg(sb, KERN_WARNING,
-                                         "buggy handling of option %s", opt);
-                                WARN_ON(1);
-                                return -1;
-                        }
-                        if (arg != 0)
-                                sbi->s_mount_opt |= m->mount_opt;
-                        else
-                                sbi->s_mount_opt &= ~m->mount_opt;
                }
-                return 1;
+#ifdef CONFIG_QUOTA
+        } else if (m->flags & MOPT_QFMT) {
+                if (sb_any_quota_loaded(sb) &&
+                    sbi->s_jquota_fmt != m->mount_opt) {
+                        ext4_msg(sb, KERN_ERR, "Cannot change journaled "
+                                 "quota options when quota turned on");
+                        return -1;
+                }
+                sbi->s_jquota_fmt = m->mount_opt;
+#endif
+        } else {
+                if (!args->from)
+                        arg = 1;
+                if (m->flags & MOPT_CLEAR)
+                        arg = !arg;
+                else if (unlikely(!(m->flags & MOPT_SET))) {
+                        ext4_msg(sb, KERN_WARNING,
+                                 "buggy handling of option %s", opt);
+                        WARN_ON(1);
+                        return -1;
+                }
+                if (arg != 0)
+                        sbi->s_mount_opt |= m->mount_opt;
+                else
+                        sbi->s_mount_opt &= ~m->mount_opt;
        }
-        ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" "
+        return 1;
-                 "or missing value", opt);
-        return -1;
 }
 static int parse_options(char *options, struct super_block *sb,
@@ -2776,7 +2701,7 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
                        break;
        }
-        if (group == ngroups)
+        if (group >= ngroups)
                ret = 1;
        if (!ret) {
@@ -3016,33 +2941,34 @@ static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
        return elr;
 }
-static int ext4_register_li_request(struct super_block *sb,
+int ext4_register_li_request(struct super_block *sb,
-                                    ext4_group_t first_not_zeroed)
+                             ext4_group_t first_not_zeroed)
 {
        struct ext4_sb_info *sbi = EXT4_SB(sb);
-        struct ext4_li_request *elr;
+        struct ext4_li_request *elr = NULL;
        ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
        int ret = 0;
+        mutex_lock(&ext4_li_mtx);
        if (sbi->s_li_request != NULL) {
                /*
                 * Reset timeout so it can be computed again, because
                 * s_li_wait_mult might have changed.
                 */
                sbi->s_li_request->lr_timeout = 0;
-                return 0;
+                goto out;
        }
        if (first_not_zeroed == ngroups ||
            (sb->s_flags & MS_RDONLY) ||
            !test_opt(sb, INIT_INODE_TABLE))
-                return 0;
+                goto out;
        elr = ext4_li_request_new(sb, first_not_zeroed);
-        if (!elr)
+        if (!elr) {
-                return -ENOMEM;
+                ret = -ENOMEM;
+                goto out;
-        mutex_lock(&ext4_li_mtx);
+        }
        if (NULL == ext4_li_info) {
                ret = ext4_li_info_new();
@@ -3379,7 +3305,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 #ifdef CONFIG_EXT4_FS_POSIX_ACL
        set_opt(sb, POSIX_ACL);
 #endif
-        set_opt(sb, MBLK_IO_SUBMIT);
        if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
                set_opt(sb, JOURNAL_DATA);
        else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
@@ -3772,6 +3697,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        sbi->s_max_writeback_mb_bump = 128;
        sbi->s_extent_max_zeroout_kb = 32;
+        /* Register extent status tree shrinker */
+        ext4_es_register_shrinker(sb);
        /*
         * set up enough so that it can read an inode
         */
@@ -4008,7 +3936,7 @@ no_journal:
            !(sb->s_flags & MS_RDONLY)) {
                err = ext4_enable_quotas(sb);
                if (err)
-                        goto failed_mount7;
+                        goto failed_mount8;
        }
 #endif  /* CONFIG_QUOTA */
@@ -4035,6 +3963,10 @@ cantfind_ext4:
                ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
        goto failed_mount;
+#ifdef CONFIG_QUOTA
+failed_mount8:
+        kobject_del(&sbi->s_kobj);
+#endif
 failed_mount7:
        ext4_unregister_li_request(sb);
 failed_mount6:
@@ -4476,16 +4408,12 @@ static void ext4_clear_journal_err(struct super_block *sb,
 int ext4_force_commit(struct super_block *sb)
 {
        journal_t *journal;
-        int ret = 0;
        if (sb->s_flags & MS_RDONLY)
                return 0;
        journal = EXT4_SB(sb)->s_journal;
-        if (journal)
+        return ext4_journal_force_commit(journal);
-                ret = ext4_journal_force_commit(journal);
-        return ret;
 }
 static int ext4_sync_fs(struct super_block *sb, int wait)
@@ -4588,7 +4516,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
        unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
        int err = 0;
 #ifdef CONFIG_QUOTA
-        int i;
+        int i, j;
 #endif
        char *orig_data = kstrdup(data, GFP_KERNEL);
@@ -4604,7 +4532,16 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
 #ifdef CONFIG_QUOTA
        old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
        for (i = 0; i < MAXQUOTAS; i++)
-                old_opts.s_qf_names[i] = sbi->s_qf_names[i];
+                if (sbi->s_qf_names[i]) {
+                        old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i],
+                                                         GFP_KERNEL);
+                        if (!old_opts.s_qf_names[i]) {
+                                for (j = 0; j < i; j++)
+                                        kfree(old_opts.s_qf_names[j]);
+                                return -ENOMEM;
+                        }
+                } else
+                        old_opts.s_qf_names[i] = NULL;
 #endif
        if (sbi->s_journal && sbi->s_journal->j_task->io_context)
                journal_ioprio = sbi->s_journal->j_task->io_context->ioprio;
@@ -4737,9 +4674,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
 #ifdef CONFIG_QUOTA
        /* Release old quota file names */
        for (i = 0; i < MAXQUOTAS; i++)
-                if (old_opts.s_qf_names[i] &&
+                kfree(old_opts.s_qf_names[i]);
-                    old_opts.s_qf_names[i] != sbi->s_qf_names[i])
-                        kfree(old_opts.s_qf_names[i]);
        if (enable_quota) {
                if (sb_any_quota_suspended(sb))
                        dquot_resume(sb, -1);
@@ -4768,9 +4703,7 @@ restore_opts:
 #ifdef CONFIG_QUOTA
        sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
        for (i = 0; i < MAXQUOTAS; i++) {
-                if (sbi->s_qf_names[i] &&
+                kfree(sbi->s_qf_names[i]);
-                    old_opts.s_qf_names[i] != sbi->s_qf_names[i])
-                        kfree(sbi->s_qf_names[i]);
                sbi->s_qf_names[i] = old_opts.s_qf_names[i];
        }
 #endif
@@ -4835,7 +4768,7 @@ static int ext4_write_dquot(struct dquot *dquot)
        struct inode *inode;
        inode = dquot_to_inode(dquot);
-        handle = ext4_journal_start(inode,
+        handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
                                    EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
        if (IS_ERR(handle))
                return PTR_ERR(handle);
@@ -4851,7 +4784,7 @@ static int ext4_acquire_dquot(struct dquot *dquot)
        int ret, err;
        handle_t *handle;
-        handle = ext4_journal_start(dquot_to_inode(dquot),
+        handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
                                    EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
        if (IS_ERR(handle))
                return PTR_ERR(handle);
@@ -4867,7 +4800,7 @@ static int ext4_release_dquot(struct dquot *dquot)
        int ret, err;
        handle_t *handle;
-        handle = ext4_journal_start(dquot_to_inode(dquot),
+        handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
                                    EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
        if (IS_ERR(handle)) {
                /* Release dquot anyway to avoid endless cycle in dqput() */
@@ -4899,7 +4832,7 @@ static int ext4_write_info(struct super_block *sb, int type)
        handle_t *handle;
        /* Data block + inode block */
-        handle = ext4_journal_start(sb->s_root->d_inode, 2);
+        handle = ext4_journal_start(sb->s_root->d_inode, EXT4_HT_QUOTA, 2);
        if (IS_ERR(handle))
                return PTR_ERR(handle);
        ret = dquot_commit_info(sb, type);
@@ -5005,9 +4938,9 @@ static int ext4_enable_quotas(struct super_block *sb)
                                                DQUOT_USAGE_ENABLED);
                        if (err) {
                                ext4_warning(sb,
-                                        "Failed to enable quota (type=%d) "
+                                        "Failed to enable quota tracking "
-                                        "tracking. Please run e2fsck to fix.",
+                                        "(type=%d, err=%d). Please run "
-                                        type);
+                                        "e2fsck to fix.", type, err);
                                return err;
                        }
                }
@@ -5045,7 +4978,7 @@ static int ext4_quota_off(struct super_block *sb, int type)
        /* Update modification times of quota files when userspace can
         * start looking at them */
-        handle = ext4_journal_start(inode, 1);
+        handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
        if (IS_ERR(handle))
                goto out;
        inode->i_mtime = inode->i_ctime = CURRENT_TIME;
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 3a91ebc2b66f..3a120b277240 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -549,7 +549,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
                error = ext4_handle_dirty_xattr_block(handle, inode, bh);
                if (IS_SYNC(inode))
                        ext4_handle_sync(handle);
-                dquot_free_block(inode, 1);
+                dquot_free_block(inode, EXT4_C2B(EXT4_SB(inode->i_sb), 1));
                ea_bdebug(bh, "refcount now=%d; releasing",
                          le32_to_cpu(BHDR(bh)->h_refcount));
        }
@@ -832,7 +832,8 @@ inserted:
                        else {
                                /* The old block is released after updating
                                   the inode. */
-                                error = dquot_alloc_block(inode, 1);
+                                error = dquot_alloc_block(inode,
+                                                EXT4_C2B(EXT4_SB(sb), 1));
                                if (error)
                                        goto cleanup;
                                error = ext4_journal_get_write_access(handle,
@@ -886,17 +887,18 @@ inserted:
                                  (unsigned long long)block);
                        new_bh = sb_getblk(sb, block);
-                        if (!new_bh) {
+                        if (unlikely(!new_bh)) {
+                                error = -ENOMEM;
 getblk_failed:
                                ext4_free_blocks(handle, inode, NULL, block, 1,
                                                 EXT4_FREE_BLOCKS_METADATA);
-                                error = -EIO;
                                goto cleanup;
                        }
                        lock_buffer(new_bh);
                        error = ext4_journal_get_create_access(handle, new_bh);
                        if (error) {
                                unlock_buffer(new_bh);
+                                error = -EIO;
                                goto getblk_failed;
                        }
                        memcpy(new_bh->b_data, s->base, new_bh->b_size);
@@ -928,7 +930,7 @@ cleanup:
        return error;
 cleanup_dquot:
-        dquot_free_block(inode, 1);
+        dquot_free_block(inode, EXT4_C2B(EXT4_SB(sb), 1));
        goto cleanup;
 bad_block:
@@ -1164,17 +1166,10 @@ ext4_xattr_set(struct inode *inode, int name_index, const char *name,
 {
        handle_t *handle;
        int error, retries = 0;
-        int credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb);
+        int credits = ext4_jbd2_credits_xattr(inode);
 retry:
-        /*
+        handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits);
-         * In case of inline data, we may push out the data to a block,
-         * So reserve the journal space first.
-         */
-        if (ext4_has_inline_data(inode))
-                credits += ext4_writepage_trans_blocks(inode) + 1;
-        handle = ext4_journal_start(inode, credits);
        if (IS_ERR(handle)) {
                error = PTR_ERR(handle);
        } else {
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 69eda787a96a..aa25deb5c6cd 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -125,74 +125,6 @@ extern int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
                                       struct ext4_xattr_info *i,
                                       struct ext4_xattr_ibody_find *is);
-extern int ext4_has_inline_data(struct inode *inode);
-extern int ext4_get_inline_size(struct inode *inode);
-extern int ext4_get_max_inline_size(struct inode *inode);
-extern int ext4_find_inline_data_nolock(struct inode *inode);
-extern void ext4_write_inline_data(struct inode *inode,
-                                   struct ext4_iloc *iloc,
-                                   void *buffer, loff_t pos,
-                                   unsigned int len);
-extern int ext4_prepare_inline_data(handle_t *handle, struct inode *inode,
-                                    unsigned int len);
-extern int ext4_init_inline_data(handle_t *handle, struct inode *inode,
-                                 unsigned int len);
-extern int ext4_destroy_inline_data(handle_t *handle, struct inode *inode);
-extern int ext4_readpage_inline(struct inode *inode, struct page *page);
-extern int ext4_try_to_write_inline_data(struct address_space *mapping,
-                                         struct inode *inode,
-                                         loff_t pos, unsigned len,
-                                         unsigned flags,
-                                         struct page **pagep);
-extern int ext4_write_inline_data_end(struct inode *inode,
-                                      loff_t pos, unsigned len,
-                                      unsigned copied,
-                                      struct page *page);
-extern struct buffer_head *
-ext4_journalled_write_inline_data(struct inode *inode,
-                                  unsigned len,
-                                  struct page *page);
-extern int ext4_da_write_inline_data_begin(struct address_space *mapping,
-                                           struct inode *inode,
-                                           loff_t pos, unsigned len,
-                                           unsigned flags,
-                                           struct page **pagep,
-                                           void **fsdata);
-extern int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
-                                         unsigned len, unsigned copied,
-                                         struct page *page);
-extern int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry,
-                                     struct inode *inode);
-extern int ext4_try_create_inline_dir(handle_t *handle,
-                                      struct inode *parent,
-                                      struct inode *inode);
-extern int ext4_read_inline_dir(struct file *filp,
-                                void *dirent, filldir_t filldir,
-                                int *has_inline_data);
-extern struct buffer_head *ext4_find_inline_entry(struct inode *dir,
-                                        const struct qstr *d_name,
-                                        struct ext4_dir_entry_2 **res_dir,
-                                        int *has_inline_data);
-extern int ext4_delete_inline_entry(handle_t *handle,
-                                    struct inode *dir,
-                                    struct ext4_dir_entry_2 *de_del,
-                                    struct buffer_head *bh,
-                                    int *has_inline_data);
-extern int empty_inline_dir(struct inode *dir, int *has_inline_data);
-extern struct buffer_head *ext4_get_first_inline_block(struct inode *inode,
-                                        struct ext4_dir_entry_2 **parent_de,
-                                        int *retval);
-extern int ext4_inline_data_fiemap(struct inode *inode,
-                                   struct fiemap_extent_info *fieinfo,
-                                   int *has_inline);
-extern int ext4_try_to_evict_inline_data(handle_t *handle,
-                                         struct inode *inode,
-                                         int needed);
-extern void ext4_inline_data_truncate(struct inode *inode, int *has_inline);
-extern int ext4_convert_inline_data(struct inode *inode);
 #ifdef CONFIG_EXT4_FS_SECURITY
 extern int ext4_init_security(handle_t *handle, struct inode *inode,
                              struct inode *dir, const struct qstr *qstr);
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index e95b94945d5f..137af4255da6 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -191,15 +191,14 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type)
                retval = f2fs_getxattr(inode, name_index, "", value, retval);
        }
-        if (retval < 0) {
+        if (retval > 0)
-                if (retval == -ENODATA)
-                        acl = NULL;
-                else
-                        acl = ERR_PTR(retval);
-        } else {
                acl = f2fs_acl_from_disk(value, retval);
-        }
+        else if (retval == -ENODATA)
+                acl = NULL;
+        else
+                acl = ERR_PTR(retval);
        kfree(value);
        if (!IS_ERR(acl))
                set_cached_acl(inode, type, acl);
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 6ef36c37e2be..2b6fc131e2ce 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -72,22 +72,22 @@ static int f2fs_write_meta_page(struct page *page,
 {
        struct inode *inode = page->mapping->host;
        struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
-        int err;
-        wait_on_page_writeback(page);
+        /* Should not write any meta pages, if any IO error was occurred */
+        if (wbc->for_reclaim ||
-        err = write_meta_page(sbi, page, wbc);
+                        is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)) {
-        if (err) {
+                dec_page_count(sbi, F2FS_DIRTY_META);
                wbc->pages_skipped++;
                set_page_dirty(page);
+                return AOP_WRITEPAGE_ACTIVATE;
        }
-        dec_page_count(sbi, F2FS_DIRTY_META);
+        wait_on_page_writeback(page);
-        /* In this case, we should not unlock this page */
+        write_meta_page(sbi, page);
-        if (err != AOP_WRITEPAGE_ACTIVATE)
+        dec_page_count(sbi, F2FS_DIRTY_META);
-                unlock_page(page);
+        unlock_page(page);
-        return err;
+        return 0;
 }
 static int f2fs_write_meta_pages(struct address_space *mapping,
@@ -138,7 +138,10 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
                        BUG_ON(page->mapping != mapping);
                        BUG_ON(!PageDirty(page));
                        clear_page_dirty_for_io(page);
-                        f2fs_write_meta_page(page, &wbc);
+                        if (f2fs_write_meta_page(page, &wbc)) {
+                                unlock_page(page);
+                                break;
+                        }
                        if (nwritten++ >= nr_to_write)
                                break;
                }
@@ -161,7 +164,6 @@ static int f2fs_set_meta_page_dirty(struct page *page)
        if (!PageDirty(page)) {
                __set_page_dirty_nobuffers(page);
                inc_page_count(sbi, F2FS_DIRTY_META);
-                F2FS_SET_SB_DIRT(sbi);
                return 1;
        }
        return 0;
@@ -214,22 +216,13 @@ retry:
                goto retry;
        }
        new->ino = ino;
-        INIT_LIST_HEAD(&new->list);
        /* add new_oentry into list which is sorted by inode number */
-        if (orphan) {
+        if (orphan)
-                struct orphan_inode_entry *prev;
+                list_add(&new->list, this->prev);
+        else
-                /* get previous entry */
-                prev = list_entry(orphan->list.prev, typeof(*prev), list);
-                if (&prev->list != head)
-                        /* insert new orphan inode entry */
-                        list_add(&new->list, &prev->list);
-                else
-                        list_add(&new->list, head);
-        } else {
                list_add_tail(&new->list, head);
-        }
        sbi->n_orphans++;
 out:
        mutex_unlock(&sbi->orphan_inode_mutex);
@@ -546,7 +539,7 @@ retry:
 /*
 * Freeze all the FS-operations for checkpoint.
 */
-void block_operations(struct f2fs_sb_info *sbi)
+static void block_operations(struct f2fs_sb_info *sbi)
 {
        int t;
        struct writeback_control wbc = {
@@ -718,27 +711,24 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
        sbi->alloc_valid_block_count = 0;
        /* Here, we only have one bio having CP pack */
-        if (is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))
+        sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
-                sbi->sb->s_flags |= MS_RDONLY;
-        else
-                sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
-        clear_prefree_segments(sbi);
+        if (!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) {
-        F2FS_RESET_SB_DIRT(sbi);
+                clear_prefree_segments(sbi);
+                F2FS_RESET_SB_DIRT(sbi);
+        }
 }
 /*
 * We guarantee that this checkpoint procedure should not fail.
 */
-void write_checkpoint(struct f2fs_sb_info *sbi, bool blocked, bool is_umount)
+void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
 {
        struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
        unsigned long long ckpt_ver;
-        if (!blocked) {
+        mutex_lock(&sbi->cp_mutex);
-                mutex_lock(&sbi->cp_mutex);
+        block_operations(sbi);
-                block_operations(sbi);
-        }
        f2fs_submit_bio(sbi, DATA, true);
        f2fs_submit_bio(sbi, NODE, true);
@@ -772,7 +762,7 @@ void init_orphan_info(struct f2fs_sb_info *sbi)
        sbi->n_orphans = 0;
 }
-int create_checkpoint_caches(void)
+int __init create_checkpoint_caches(void)
 {
        orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry",
                        sizeof(struct orphan_inode_entry), NULL);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 655aeabc1dd4..7bd22a201125 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -16,6 +16,7 @@
 #include <linux/backing-dev.h>
 #include <linux/blkdev.h>
 #include <linux/bio.h>
+#include <linux/prefetch.h>
 #include "f2fs.h"
 #include "node.h"
@@ -546,6 +547,15 @@ redirty_out:
 #define MAX_DESIRED_PAGES_WP    4096
+static int __f2fs_writepage(struct page *page, struct writeback_control *wbc,
+                        void *data)
+{
+        struct address_space *mapping = data;
+        int ret = mapping->a_ops->writepage(page, wbc);
+        mapping_set_error(mapping, ret);
+        return ret;
+}
 static int f2fs_write_data_pages(struct address_space *mapping,
                            struct writeback_control *wbc)
 {
@@ -562,7 +572,7 @@ static int f2fs_write_data_pages(struct address_space *mapping,
        if (!S_ISDIR(inode->i_mode))
                mutex_lock(&sbi->writepages);
-        ret = generic_writepages(mapping, wbc);
+        ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);
        if (!S_ISDIR(inode->i_mode))
                mutex_unlock(&sbi->writepages);
        f2fs_submit_bio(sbi, DATA, (wbc->sync_mode == WB_SYNC_ALL));
@@ -688,6 +698,11 @@ static int f2fs_set_data_page_dirty(struct page *page)
        return 0;
 }
+static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
+{
+        return generic_block_bmap(mapping, block, get_data_block_ro);
+}
 const struct address_space_operations f2fs_dblock_aops = {
        .readpage       = f2fs_read_data_page,
        .readpages      = f2fs_read_data_pages,
@@ -699,4 +714,5 @@ const struct address_space_operations f2fs_dblock_aops = {
        .invalidatepage = f2fs_invalidate_data_page,
        .releasepage    = f2fs_release_data_page,
        .direct_IO      = f2fs_direct_IO,
+        .bmap           = f2fs_bmap,
 };
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 0e0380a588ad..025b9e2f935d 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -26,6 +26,7 @@
 static LIST_HEAD(f2fs_stat_list);
 static struct dentry *debugfs_root;
+static DEFINE_MUTEX(f2fs_stat_mutex);
 static void update_general_status(struct f2fs_sb_info *sbi)
 {
@@ -180,18 +181,16 @@ static int stat_show(struct seq_file *s, void *v)
        int i = 0;
        int j;
+        mutex_lock(&f2fs_stat_mutex);
        list_for_each_entry_safe(si, next, &f2fs_stat_list, stat_list) {
+                char devname[BDEVNAME_SIZE];
-                mutex_lock(&si->stat_lock);
-                if (!si->sbi) {
-                        mutex_unlock(&si->stat_lock);
-                        continue;
-                }
                update_general_status(si->sbi);
-                seq_printf(s, "\n=====[ partition info. #%d ]=====\n", i++);
+                seq_printf(s, "\n=====[ partition info(%s). #%d ]=====\n",
-                seq_printf(s, "[SB: 1] [CP: 2] [NAT: %d] [SIT: %d] ",
+                        bdevname(si->sbi->sb->s_bdev, devname), i++);
-                           si->nat_area_segs, si->sit_area_segs);
+                seq_printf(s, "[SB: 1] [CP: 2] [SIT: %d] [NAT: %d] ",
+                           si->sit_area_segs, si->nat_area_segs);
                seq_printf(s, "[SSA: %d] [MAIN: %d",
                           si->ssa_area_segs, si->main_area_segs);
                seq_printf(s, "(OverProv:%d Resv:%d)]\n\n",
@@ -286,8 +285,8 @@ static int stat_show(struct seq_file *s, void *v)
                seq_printf(s, "\nMemory: %u KB = static: %u + cached: %u\n",
                                (si->base_mem + si->cache_mem) >> 10,
                                si->base_mem >> 10, si->cache_mem >> 10);
-                mutex_unlock(&si->stat_lock);
        }
+        mutex_unlock(&f2fs_stat_mutex);
        return 0;
 }
@@ -303,7 +302,7 @@ static const struct file_operations stat_fops = {
        .release = single_release,
 };
-static int init_stats(struct f2fs_sb_info *sbi)
+int f2fs_build_stats(struct f2fs_sb_info *sbi)
 {
        struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
        struct f2fs_stat_info *si;
@@ -313,9 +312,6 @@ static int init_stats(struct f2fs_sb_info *sbi)
                return -ENOMEM;
        si = sbi->stat_info;
-        mutex_init(&si->stat_lock);
-        list_add_tail(&si->stat_list, &f2fs_stat_list);
        si->all_area_segs = le32_to_cpu(raw_super->segment_count);
        si->sit_area_segs = le32_to_cpu(raw_super->segment_count_sit);
        si->nat_area_segs = le32_to_cpu(raw_super->segment_count_nat);
@@ -325,21 +321,11 @@ static int init_stats(struct f2fs_sb_info *sbi)
        si->main_area_zones = si->main_area_sections /
                                le32_to_cpu(raw_super->secs_per_zone);
        si->sbi = sbi;
-        return 0;
-}
-int f2fs_build_stats(struct f2fs_sb_info *sbi)
+        mutex_lock(&f2fs_stat_mutex);
-{
+        list_add_tail(&si->stat_list, &f2fs_stat_list);
-        int retval;
+        mutex_unlock(&f2fs_stat_mutex);
-        retval = init_stats(sbi);
-        if (retval)
-                return retval;
-        if (!debugfs_root)
-                debugfs_root = debugfs_create_dir("f2fs", NULL);
-        debugfs_create_file("status", S_IRUGO, debugfs_root, NULL, &stat_fops);
        return 0;
 }
@@ -347,14 +333,22 @@ void f2fs_destroy_stats(struct f2fs_sb_info *sbi)
 {
        struct f2fs_stat_info *si = sbi->stat_info;
+        mutex_lock(&f2fs_stat_mutex);
        list_del(&si->stat_list);
-        mutex_lock(&si->stat_lock);
+        mutex_unlock(&f2fs_stat_mutex);
-        si->sbi = NULL;
-        mutex_unlock(&si->stat_lock);
        kfree(sbi->stat_info);
 }
-void destroy_root_stats(void)
+void __init f2fs_create_root_stats(void)
+{
+        debugfs_root = debugfs_create_dir("f2fs", NULL);
+        if (debugfs_root)
+                debugfs_create_file("status", S_IRUGO, debugfs_root,
+                                         NULL, &stat_fops);
+}
+void f2fs_destroy_root_stats(void)
 {
        debugfs_remove_recursive(debugfs_root);
        debugfs_root = NULL;
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index b4e24f32b54e..a1f38443ecee 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -11,6 +11,7 @@
 #include <linux/fs.h>
 #include <linux/f2fs_fs.h>
 #include "f2fs.h"
+#include "node.h"
 #include "acl.h"
 static unsigned long dir_blocks(struct inode *inode)
@@ -74,7 +75,7 @@ static unsigned long dir_block_index(unsigned int level, unsigned int idx)
        return bidx;
 }
-static bool early_match_name(const char *name, int namelen,
+static bool early_match_name(const char *name, size_t namelen,
                        f2fs_hash_t namehash, struct f2fs_dir_entry *de)
 {
        if (le16_to_cpu(de->name_len) != namelen)
@@ -87,7 +88,7 @@ static bool early_match_name(const char *name, int namelen,
 }
 static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
-                        const char *name, int namelen, int *max_slots,
+                        const char *name, size_t namelen, int *max_slots,
                        f2fs_hash_t namehash, struct page **res_page)
 {
        struct f2fs_dir_entry *de;
@@ -126,7 +127,7 @@ found:
 }
 static struct f2fs_dir_entry *find_in_level(struct inode *dir,
-                unsigned int level, const char *name, int namelen,
+                unsigned int level, const char *name, size_t namelen,
                        f2fs_hash_t namehash, struct page **res_page)
 {
        int s = GET_DENTRY_SLOTS(namelen);
@@ -181,7 +182,7 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
                        struct qstr *child, struct page **res_page)
 {
        const char *name = child->name;
-        int namelen = child->len;
+        size_t namelen = child->len;
        unsigned long npages = dir_blocks(dir);
        struct f2fs_dir_entry *de = NULL;
        f2fs_hash_t name_hash;
@@ -264,7 +265,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
        mutex_unlock_op(sbi, DENTRY_OPS);
 }
-void init_dent_inode(struct dentry *dentry, struct page *ipage)
+void init_dent_inode(const struct qstr *name, struct page *ipage)
 {
        struct f2fs_node *rn;
@@ -273,20 +274,19 @@ void init_dent_inode(struct dentry *dentry, struct page *ipage)
        wait_on_page_writeback(ipage);
-        /* copy dentry info. to this inode page */
+        /* copy name info. to this inode page */
        rn = (struct f2fs_node *)page_address(ipage);
-        rn->i.i_namelen = cpu_to_le32(dentry->d_name.len);
+        rn->i.i_namelen = cpu_to_le32(name->len);
-        memcpy(rn->i.i_name, dentry->d_name.name, dentry->d_name.len);
+        memcpy(rn->i.i_name, name->name, name->len);
        set_page_dirty(ipage);
 }
-static int init_inode_metadata(struct inode *inode, struct dentry *dentry)
+static int init_inode_metadata(struct inode *inode,
+                struct inode *dir, const struct qstr *name)
 {
-        struct inode *dir = dentry->d_parent->d_inode;
        if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) {
                int err;
-                err = new_inode_page(inode, dentry);
+                err = new_inode_page(inode, name);
                if (err)
                        return err;
@@ -308,7 +308,8 @@ static int init_inode_metadata(struct inode *inode, struct dentry *dentry)
                ipage = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino);
                if (IS_ERR(ipage))
                        return PTR_ERR(ipage);
-                init_dent_inode(dentry, ipage);
+                set_cold_node(inode, ipage);
+                init_dent_inode(name, ipage);
                f2fs_put_page(ipage, 1);
        }
        if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) {
@@ -369,7 +370,7 @@ next:
        goto next;
 }
-int f2fs_add_link(struct dentry *dentry, struct inode *inode)
+int __f2fs_add_link(struct inode *dir, const struct qstr *name, struct inode *inode)
 {
        unsigned int bit_pos;
        unsigned int level;
@@ -378,17 +379,15 @@ int f2fs_add_link(struct dentry *dentry, struct inode *inode)
        f2fs_hash_t dentry_hash;
        struct f2fs_dir_entry *de;
        unsigned int nbucket, nblock;
-        struct inode *dir = dentry->d_parent->d_inode;
        struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
-        const char *name = dentry->d_name.name;
+        size_t namelen = name->len;
-        int namelen = dentry->d_name.len;
        struct page *dentry_page = NULL;
        struct f2fs_dentry_block *dentry_blk = NULL;
        int slots = GET_DENTRY_SLOTS(namelen);
        int err = 0;
        int i;
-        dentry_hash = f2fs_dentry_hash(name, dentry->d_name.len);
+        dentry_hash = f2fs_dentry_hash(name->name, name->len);
        level = 0;
        current_depth = F2FS_I(dir)->i_current_depth;
        if (F2FS_I(dir)->chash == dentry_hash) {
@@ -431,7 +430,7 @@ start:
        ++level;
        goto start;
 add_dentry:
-        err = init_inode_metadata(inode, dentry);
+        err = init_inode_metadata(inode, dir, name);
        if (err)
                goto fail;
@@ -440,7 +439,7 @@ add_dentry:
        de = &dentry_blk->dentry[bit_pos];
        de->hash_code = dentry_hash;
        de->name_len = cpu_to_le16(namelen);
-        memcpy(dentry_blk->filename[bit_pos], name, namelen);
+        memcpy(dentry_blk->filename[bit_pos], name->name, name->len);
        de->ino = cpu_to_le32(inode->i_ino);
        set_de_type(de, inode);
        for (i = 0; i < slots; i++)
@@ -501,7 +500,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
        }
        if (inode) {
-                inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+                inode->i_ctime = CURRENT_TIME;
                drop_nlink(inode);
                if (S_ISDIR(inode->i_mode)) {
                        drop_nlink(inode);
@@ -540,13 +539,13 @@ int f2fs_make_empty(struct inode *inode, struct inode *parent)
        de = &dentry_blk->dentry[0];
        de->name_len = cpu_to_le16(1);
-        de->hash_code = 0;
+        de->hash_code = f2fs_dentry_hash(".", 1);
        de->ino = cpu_to_le32(inode->i_ino);
        memcpy(dentry_blk->filename[0], ".", 1);
        set_de_type(de, inode);
        de = &dentry_blk->dentry[1];
-        de->hash_code = 0;
+        de->hash_code = f2fs_dentry_hash("..", 2);
        de->name_len = cpu_to_le16(2);
        de->ino = cpu_to_le32(parent->i_ino);
        memcpy(dentry_blk->filename[1], "..", 2);
@@ -601,7 +600,7 @@ bool f2fs_empty_dir(struct inode *dir)
 static int f2fs_readdir(struct file *file, void *dirent, filldir_t filldir)
 {
        unsigned long pos = file->f_pos;
-        struct inode *inode = file->f_dentry->d_inode;
+        struct inode *inode = file_inode(file);
        unsigned long npages = dir_blocks(inode);
        unsigned char *types = NULL;
        unsigned int bit_pos = 0, start_bit_pos = 0;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index a18d63db2fb6..cc2213afdcc7 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -104,6 +104,20 @@ static inline int update_sits_in_cursum(struct f2fs_summary_block *rs, int i)
 }
 /*
+ * ioctl commands
+ */
+#define F2FS_IOC_GETFLAGS               FS_IOC_GETFLAGS
+#define F2FS_IOC_SETFLAGS               FS_IOC_SETFLAGS
+#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
+/*
+ * ioctl commands in 32 bit emulation
+ */
+#define F2FS_IOC32_GETFLAGS             FS_IOC32_GETFLAGS
+#define F2FS_IOC32_SETFLAGS             FS_IOC32_SETFLAGS
+#endif
+/*
 * For INODE and NODE manager
 */
 #define XATTR_NODE_OFFSET       (-1)    /*
@@ -141,7 +155,7 @@ struct f2fs_inode_info {
        /* Use below internally in f2fs*/
        unsigned long flags;            /* use to pass per-file flags */
-        unsigned long long data_version;/* lastes version of data for fsync */
+        unsigned long long data_version;/* latest version of data for fsync */
        atomic_t dirty_dents;           /* # of dirty dentry pages */
        f2fs_hash_t chash;              /* hash value of given file name */
        unsigned int clevel;            /* maximum level of given file name */
@@ -211,11 +225,11 @@ struct dnode_of_data {
 static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode,
                struct page *ipage, struct page *npage, nid_t nid)
 {
+        memset(dn, 0, sizeof(*dn));
        dn->inode = inode;
        dn->inode_page = ipage;
        dn->node_page = npage;
        dn->nid = nid;
-        dn->inode_page_locked = 0;
 }
 /*
@@ -573,6 +587,14 @@ static inline int get_pages(struct f2fs_sb_info *sbi, int count_type)
        return atomic_read(&sbi->nr_pages[count_type]);
 }
+static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type)
+{
+        unsigned int pages_per_sec = sbi->segs_per_sec *
+                                        (1 << sbi->log_blocks_per_seg);
+        return ((get_pages(sbi, block_type) + pages_per_sec - 1)
+                        >> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
+}
 static inline block_t valid_user_blocks(struct f2fs_sb_info *sbi)
 {
        block_t ret;
@@ -842,12 +864,12 @@ void f2fs_truncate(struct inode *);
 int f2fs_setattr(struct dentry *, struct iattr *);
 int truncate_hole(struct inode *, pgoff_t, pgoff_t);
 long f2fs_ioctl(struct file *, unsigned int, unsigned long);
+long f2fs_compat_ioctl(struct file *, unsigned int, unsigned long);
 /*
 * inode.c
 */
 void f2fs_set_inode_flags(struct inode *);
-struct inode *f2fs_iget_nowait(struct super_block *, unsigned long);
 struct inode *f2fs_iget(struct super_block *, unsigned long);
 void update_inode(struct inode *, struct page *);
 int f2fs_write_inode(struct inode *, struct writeback_control *);
@@ -867,21 +889,29 @@ struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **);
 ino_t f2fs_inode_by_name(struct inode *, struct qstr *);
 void f2fs_set_link(struct inode *, struct f2fs_dir_entry *,
                                struct page *, struct inode *);
-void init_dent_inode(struct dentry *, struct page *);
+void init_dent_inode(const struct qstr *, struct page *);
-int f2fs_add_link(struct dentry *, struct inode *);
+int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *);
 void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *);
 int f2fs_make_empty(struct inode *, struct inode *);
 bool f2fs_empty_dir(struct inode *);
+static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
+{
+        return __f2fs_add_link(dentry->d_parent->d_inode, &dentry->d_name,
+                                inode);
+}
 /*
 * super.c
 */
 int f2fs_sync_fs(struct super_block *, int);
+extern __printf(3, 4)
+void f2fs_msg(struct super_block *, const char *, const char *, ...);
 /*
 * hash.c
 */
-f2fs_hash_t f2fs_dentry_hash(const char *, int);
+f2fs_hash_t f2fs_dentry_hash(const char *, size_t);
 /*
 * node.c
@@ -894,7 +924,7 @@ void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *);
 int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);
 int truncate_inode_blocks(struct inode *, pgoff_t);
 int remove_inode_page(struct inode *);
-int new_inode_page(struct inode *, struct dentry *);
+int new_inode_page(struct inode *, const struct qstr *);
 struct page *new_node_page(struct dnode_of_data *, unsigned int);
 void ra_node_page(struct f2fs_sb_info *, nid_t);
 struct page *get_node_page(struct f2fs_sb_info *, pgoff_t);
@@ -912,7 +942,7 @@ int restore_node_summary(struct f2fs_sb_info *, unsigned int,
 void flush_nat_entries(struct f2fs_sb_info *);
 int build_node_manager(struct f2fs_sb_info *);
 void destroy_node_manager(struct f2fs_sb_info *);
-int create_node_manager_caches(void);
+int __init create_node_manager_caches(void);
 void destroy_node_manager_caches(void);
 /*
@@ -927,8 +957,7 @@ void allocate_new_segments(struct f2fs_sb_info *);
 struct page *get_sum_page(struct f2fs_sb_info *, unsigned int);
 struct bio *f2fs_bio_alloc(struct block_device *, int);
 void f2fs_submit_bio(struct f2fs_sb_info *, enum page_type, bool sync);
-int write_meta_page(struct f2fs_sb_info *, struct page *,
+void write_meta_page(struct f2fs_sb_info *, struct page *);
-                                        struct writeback_control *);
 void write_node_page(struct f2fs_sb_info *, struct page *, unsigned int,
                                        block_t, block_t *);
 void write_data_page(struct inode *, struct page *, struct dnode_of_data*,
@@ -961,10 +990,9 @@ int get_valid_checkpoint(struct f2fs_sb_info *);
 void set_dirty_dir_page(struct inode *, struct page *);
 void remove_dirty_dir_inode(struct inode *);
 void sync_dirty_dir_inodes(struct f2fs_sb_info *);
-void block_operations(struct f2fs_sb_info *);
+void write_checkpoint(struct f2fs_sb_info *, bool);
-void write_checkpoint(struct f2fs_sb_info *, bool, bool);
 void init_orphan_info(struct f2fs_sb_info *);
-int create_checkpoint_caches(void);
+int __init create_checkpoint_caches(void);
 void destroy_checkpoint_caches(void);
 /*
@@ -984,9 +1012,9 @@ int do_write_data_page(struct page *);
 int start_gc_thread(struct f2fs_sb_info *);
 void stop_gc_thread(struct f2fs_sb_info *);
 block_t start_bidx_of_node(unsigned int);
-int f2fs_gc(struct f2fs_sb_info *, int);
+int f2fs_gc(struct f2fs_sb_info *);
 void build_gc_manager(struct f2fs_sb_info *);
-int create_gc_caches(void);
+int __init create_gc_caches(void);
 void destroy_gc_caches(void);
 /*
@@ -1058,7 +1086,8 @@ struct f2fs_stat_info {
 int f2fs_build_stats(struct f2fs_sb_info *);
 void f2fs_destroy_stats(struct f2fs_sb_info *);
-void destroy_root_stats(void);
+void __init f2fs_create_root_stats(void);
+void f2fs_destroy_root_stats(void);
 #else
 #define stat_inc_call_count(si)
 #define stat_inc_seg_count(si, type)
@@ -1068,7 +1097,8 @@ void destroy_root_stats(void);
 static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; }
 static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { }
-static inline void destroy_root_stats(void) { }
+static inline void __init f2fs_create_root_stats(void) { }
+static inline void f2fs_destroy_root_stats(void) { }
 #endif
 extern const struct file_operations f2fs_dir_operations;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index f9e085dfb1f0..b7a053d4c6d3 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -15,6 +15,7 @@
 #include <linux/writeback.h>
 #include <linux/falloc.h>
 #include <linux/types.h>
+#include <linux/compat.h>
 #include <linux/uaccess.h>
 #include <linux/mount.h>
@@ -96,8 +97,9 @@ out:
 }
 static const struct vm_operations_struct f2fs_file_vm_ops = {
-        .fault        = filemap_fault,
+        .fault          = filemap_fault,
-        .page_mkwrite = f2fs_vm_page_mkwrite,
+        .page_mkwrite   = f2fs_vm_page_mkwrite,
+        .remap_pages    = generic_file_remap_pages,
 };
 static int need_to_sync_dir(struct f2fs_sb_info *sbi, struct inode *inode)
@@ -137,6 +139,9 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
        if (ret)
                return ret;
+        /* guarantee free sections for fsync */
+        f2fs_balance_fs(sbi);
        mutex_lock(&inode->i_mutex);
        if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
@@ -153,22 +158,24 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
        if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1)
                need_cp = true;
-        if (is_inode_flag_set(F2FS_I(inode), FI_NEED_CP))
+        else if (is_inode_flag_set(F2FS_I(inode), FI_NEED_CP))
                need_cp = true;
-        if (!space_for_roll_forward(sbi))
+        else if (!space_for_roll_forward(sbi))
                need_cp = true;
-        if (need_to_sync_dir(sbi, inode))
+        else if (need_to_sync_dir(sbi, inode))
                need_cp = true;
-        f2fs_write_inode(inode, NULL);
        if (need_cp) {
                /* all the dirty node pages should be flushed for POR */
                ret = f2fs_sync_fs(inode->i_sb, 1);
                clear_inode_flag(F2FS_I(inode), FI_NEED_CP);
        } else {
-                while (sync_node_pages(sbi, inode->i_ino, &wbc) == 0)
+                /* if there is no written node page, write its inode page */
-                        f2fs_write_inode(inode, NULL);
+                while (!sync_node_pages(sbi, inode->i_ino, &wbc)) {
+                        ret = f2fs_write_inode(inode, NULL);
+                        if (ret)
+                                goto out;
+                }
                filemap_fdatawait_range(sbi->node_inode->i_mapping,
                                                        0, LONG_MAX);
        }
@@ -292,8 +299,6 @@ void f2fs_truncate(struct inode *inode)
                inode->i_mtime = inode->i_ctime = CURRENT_TIME;
                mark_inode_dirty(inode);
        }
-        f2fs_balance_fs(F2FS_SB(inode->i_sb));
 }
 static int f2fs_getattr(struct vfsmount *mnt,
@@ -350,6 +355,7 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
                        attr->ia_size != i_size_read(inode)) {
                truncate_setsize(inode, attr->ia_size);
                f2fs_truncate(inode);
+                f2fs_balance_fs(F2FS_SB(inode->i_sb));
        }
        __setattr_copy(inode, attr);
@@ -381,12 +387,17 @@ const struct inode_operations f2fs_file_inode_operations = {
 static void fill_zero(struct inode *inode, pgoff_t index,
                                        loff_t start, loff_t len)
 {
+        struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
        struct page *page;
        if (!len)
                return;
+        f2fs_balance_fs(sbi);
+        mutex_lock_op(sbi, DATA_NEW);
        page = get_new_data_page(inode, index, false);
+        mutex_unlock_op(sbi, DATA_NEW);
        if (!IS_ERR(page)) {
                wait_on_page_writeback(page);
@@ -405,6 +416,8 @@ int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
                struct dnode_of_data dn;
                struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+                f2fs_balance_fs(sbi);
                mutex_lock_op(sbi, DATA_TRUNC);
                set_new_dnode(&dn, inode, NULL, NULL, 0);
                err = get_dnode_of_data(&dn, index, RDONLY_NODE);
@@ -532,7 +545,6 @@ static long f2fs_fallocate(struct file *file, int mode,
                                loff_t offset, loff_t len)
 {
        struct inode *inode = file->f_path.dentry->d_inode;
-        struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
        long ret;
        if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
@@ -543,7 +555,10 @@ static long f2fs_fallocate(struct file *file, int mode,
        else
                ret = expand_inode_data(inode, offset, len, mode);
-        f2fs_balance_fs(sbi);
+        if (!ret) {
+                inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+                mark_inode_dirty(inode);
+        }
        return ret;
 }
@@ -620,6 +635,23 @@ out:
        }
 }
+#ifdef CONFIG_COMPAT
+long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+        switch (cmd) {
+        case F2FS_IOC32_GETFLAGS:
+                cmd = F2FS_IOC_GETFLAGS;
+                break;
+        case F2FS_IOC32_SETFLAGS:
+                cmd = F2FS_IOC_SETFLAGS;
+                break;
+        default:
+                return -ENOIOCTLCMD;
+        }
+        return f2fs_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
+}
+#endif
 const struct file_operations f2fs_file_operations = {
        .llseek         = generic_file_llseek,
        .read           = do_sync_read,
@@ -631,6 +663,9 @@ const struct file_operations f2fs_file_operations = {
        .fsync          = f2fs_sync_file,
        .fallocate      = f2fs_fallocate,
        .unlocked_ioctl = f2fs_ioctl,
+#ifdef CONFIG_COMPAT
+        .compat_ioctl   = f2fs_compat_ioctl,
+#endif
        .splice_read    = generic_file_splice_read,
        .splice_write   = generic_file_splice_write,
 };
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 644aa3808273..94b8a0c48453 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -44,10 +44,10 @@ static int gc_thread_func(void *data)
                if (kthread_should_stop())
                        break;
-                f2fs_balance_fs(sbi);
+                if (sbi->sb->s_writers.frozen >= SB_FREEZE_WRITE) {
+                        wait_ms = GC_THREAD_MAX_SLEEP_TIME;
-                if (!test_opt(sbi, BG_GC))
                        continue;
+                }
                /*
                 * [GC triggering condition]
@@ -78,7 +78,8 @@ static int gc_thread_func(void *data)
                sbi->bg_gc++;
-                if (f2fs_gc(sbi, 1) == GC_NONE)
+                /* if return value is not zero, no victim was selected */
+                if (f2fs_gc(sbi))
                        wait_ms = GC_THREAD_NOGC_SLEEP_TIME;
                else if (wait_ms == GC_THREAD_NOGC_SLEEP_TIME)
                        wait_ms = GC_THREAD_MAX_SLEEP_TIME;
@@ -90,7 +91,10 @@ static int gc_thread_func(void *data)
 int start_gc_thread(struct f2fs_sb_info *sbi)
 {
        struct f2fs_gc_kthread *gc_th;
+        dev_t dev = sbi->sb->s_bdev->bd_dev;
+        if (!test_opt(sbi, BG_GC))
+                return 0;
        gc_th = kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
        if (!gc_th)
                return -ENOMEM;
@@ -98,9 +102,10 @@ int start_gc_thread(struct f2fs_sb_info *sbi)
        sbi->gc_thread = gc_th;
        init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
        sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
-                                GC_THREAD_NAME);
+                        "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
        if (IS_ERR(gc_th->f2fs_gc_task)) {
                kfree(gc_th);
+                sbi->gc_thread = NULL;
                return -ENOMEM;
        }
        return 0;
@@ -141,6 +146,9 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
 static unsigned int get_max_cost(struct f2fs_sb_info *sbi,
                                struct victim_sel_policy *p)
 {
+        /* SSR allocates in a segment unit */
+        if (p->alloc_mode == SSR)
+                return 1 << sbi->log_blocks_per_seg;
        if (p->gc_mode == GC_GREEDY)
                return (1 << sbi->log_blocks_per_seg) * p->ofs_unit;
        else if (p->gc_mode == GC_CB)
@@ -356,7 +364,7 @@ static int check_valid_map(struct f2fs_sb_info *sbi,
        sentry = get_seg_entry(sbi, segno);
        ret = f2fs_test_bit(offset, sentry->cur_valid_map);
        mutex_unlock(&sit_i->sentry_lock);
-        return ret ? GC_OK : GC_NEXT;
+        return ret;
 }
 /*
@@ -364,7 +372,7 @@ static int check_valid_map(struct f2fs_sb_info *sbi,
 * On validity, copy that node with cold status, otherwise (invalid node)
 * ignore that.
 */
-static int gc_node_segment(struct f2fs_sb_info *sbi,
+static void gc_node_segment(struct f2fs_sb_info *sbi,
                struct f2fs_summary *sum, unsigned int segno, int gc_type)
 {
        bool initial = true;
@@ -376,23 +384,12 @@ next_step:
        for (off = 0; off < sbi->blocks_per_seg; off++, entry++) {
                nid_t nid = le32_to_cpu(entry->nid);
                struct page *node_page;
-                int err;
-                /*
+                /* stop BG_GC if there is not enough free sections. */
-                 * It makes sure that free segments are able to write
+                if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0))
-                 * all the dirty node pages before CP after this CP.
+                        return;
-                 * So let's check the space of dirty node pages.
-                 */
-                if (should_do_checkpoint(sbi)) {
-                        mutex_lock(&sbi->cp_mutex);
-                        block_operations(sbi);
-                        return GC_BLOCKED;
-                }
-                err = check_valid_map(sbi, segno, off);
+                if (check_valid_map(sbi, segno, off) == 0)
-                if (err == GC_ERROR)
-                        return err;
-                else if (err == GC_NEXT)
                        continue;
                if (initial) {
@@ -422,36 +419,33 @@ next_step:
                };
                sync_node_pages(sbi, 0, &wbc);
        }
-        return GC_DONE;
 }
 /*
- * Calculate start block index that this node page contains
+ * Calculate start block index indicating the given node offset.
+ * Be careful, caller should give this node offset only indicating direct node
+ * blocks. If any node offsets, which point the other types of node blocks such
+ * as indirect or double indirect node blocks, are given, it must be a caller's
+ * bug.
 */
 block_t start_bidx_of_node(unsigned int node_ofs)
 {
-        block_t start_bidx;
+        unsigned int indirect_blks = 2 * NIDS_PER_BLOCK + 4;
-        unsigned int bidx, indirect_blks;
+        unsigned int bidx;
-        int dec;
-        indirect_blks = 2 * NIDS_PER_BLOCK + 4;
+        if (node_ofs == 0)
+                return 0;
-        start_bidx = 1;
+        if (node_ofs <= 2) {
-        if (node_ofs == 0) {
-                start_bidx = 0;
-        } else if (node_ofs <= 2) {
                bidx = node_ofs - 1;
        } else if (node_ofs <= indirect_blks) {
-                dec = (node_ofs - 4) / (NIDS_PER_BLOCK + 1);
+                int dec = (node_ofs - 4) / (NIDS_PER_BLOCK + 1);
                bidx = node_ofs - 2 - dec;
        } else {
-                dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1);
+                int dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1);
                bidx = node_ofs - 5 - dec;
        }
+        return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE;
-        if (start_bidx)
-                start_bidx = bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE;
-        return start_bidx;
 }
 static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
@@ -467,13 +461,13 @@ static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
        node_page = get_node_page(sbi, nid);
        if (IS_ERR(node_page))
-                return GC_NEXT;
+                return 0;
        get_node_info(sbi, nid, dni);
        if (sum->version != dni->version) {
                f2fs_put_page(node_page, 1);
-                return GC_NEXT;
+                return 0;
        }
        *nofs = ofs_of_node(node_page);
@@ -481,8 +475,8 @@ static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
        f2fs_put_page(node_page, 1);
        if (source_blkaddr != blkaddr)
-                return GC_NEXT;
+                return 0;
-        return GC_OK;
+        return 1;
 }
 static void move_data_page(struct inode *inode, struct page *page, int gc_type)
@@ -523,13 +517,13 @@ out:
 * If the parent node is not valid or the data block address is different,
 * the victim data block is ignored.
 */
-static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
+static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
                struct list_head *ilist, unsigned int segno, int gc_type)
 {
        struct super_block *sb = sbi->sb;
        struct f2fs_summary *entry;
        block_t start_addr;
-        int err, off;
+        int off;
        int phase = 0;
        start_addr = START_BLOCK(sbi, segno);
@@ -543,22 +537,11 @@ next_step:
                unsigned int ofs_in_node, nofs;
                block_t start_bidx;
-                /*
+                /* stop BG_GC if there is not enough free sections. */
-                 * It makes sure that free segments are able to write
+                if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0))
-                 * all the dirty node pages before CP after this CP.
+                        return;
-                 * So let's check the space of dirty node pages.
-                 */
-                if (should_do_checkpoint(sbi)) {
-                        mutex_lock(&sbi->cp_mutex);
-                        block_operations(sbi);
-                        err = GC_BLOCKED;
-                        goto stop;
-                }
-                err = check_valid_map(sbi, segno, off);
+                if (check_valid_map(sbi, segno, off) == 0)
-                if (err == GC_ERROR)
-                        goto stop;
-                else if (err == GC_NEXT)
                        continue;
                if (phase == 0) {
@@ -567,10 +550,7 @@ next_step:
                }
                /* Get an inode by ino with checking validity */
-                err = check_dnode(sbi, entry, &dni, start_addr + off, &nofs);
+                if (check_dnode(sbi, entry, &dni, start_addr + off, &nofs) == 0)
-                if (err == GC_ERROR)
-                        goto stop;
-                else if (err == GC_NEXT)
                        continue;
                if (phase == 1) {
@@ -582,7 +562,7 @@ next_step:
                ofs_in_node = le16_to_cpu(entry->ofs_in_node);
                if (phase == 2) {
-                        inode = f2fs_iget_nowait(sb, dni.ino);
+                        inode = f2fs_iget(sb, dni.ino);
                        if (IS_ERR(inode))
                                continue;
@@ -610,11 +590,9 @@ next_iput:
        }
        if (++phase < 4)
                goto next_step;
-        err = GC_DONE;
-stop:
        if (gc_type == FG_GC)
                f2fs_submit_bio(sbi, DATA, true);
-        return err;
 }
 static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
@@ -628,17 +606,16 @@ static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
        return ret;
 }
-static int do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
+static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
                                struct list_head *ilist, int gc_type)
 {
        struct page *sum_page;
        struct f2fs_summary_block *sum;
-        int ret = GC_DONE;
        /* read segment summary of victim */
        sum_page = get_sum_page(sbi, segno);
        if (IS_ERR(sum_page))
-                return GC_ERROR;
+                return;
        /*
         * CP needs to lock sum_page. In this time, we don't need
@@ -650,76 +627,55 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
        switch (GET_SUM_TYPE((&sum->footer))) {
        case SUM_TYPE_NODE:
-                ret = gc_node_segment(sbi, sum->entries, segno, gc_type);
+                gc_node_segment(sbi, sum->entries, segno, gc_type);
                break;
        case SUM_TYPE_DATA:
-                ret = gc_data_segment(sbi, sum->entries, ilist, segno, gc_type);
+                gc_data_segment(sbi, sum->entries, ilist, segno, gc_type);
                break;
        }
        stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer)));
        stat_inc_call_count(sbi->stat_info);
        f2fs_put_page(sum_page, 0);
-        return ret;
 }
-int f2fs_gc(struct f2fs_sb_info *sbi, int nGC)
+int f2fs_gc(struct f2fs_sb_info *sbi)
 {
-        unsigned int segno;
-        int old_free_secs, cur_free_secs;
-        int gc_status, nfree;
        struct list_head ilist;
+        unsigned int segno, i;
        int gc_type = BG_GC;
+        int nfree = 0;
+        int ret = -1;
        INIT_LIST_HEAD(&ilist);
 gc_more:
-        nfree = 0;
+        if (!(sbi->sb->s_flags & MS_ACTIVE))
-        gc_status = GC_NONE;
+                goto stop;
-        if (has_not_enough_free_secs(sbi))
+        if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree))
-                old_free_secs = reserved_sections(sbi);
+                gc_type = FG_GC;
-        else
-                old_free_secs = free_sections(sbi);
-        while (sbi->sb->s_flags & MS_ACTIVE) {
+        if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE))
-                int i;
+                goto stop;
-                if (has_not_enough_free_secs(sbi))
+        ret = 0;
-                        gc_type = FG_GC;
-                cur_free_secs = free_sections(sbi) + nfree;
+        for (i = 0; i < sbi->segs_per_sec; i++)
+                do_garbage_collect(sbi, segno + i, &ilist, gc_type);
-                /* We got free space successfully. */
+        if (gc_type == FG_GC &&
-                if (nGC < cur_free_secs - old_free_secs)
+                        get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0)
-                        break;
+                nfree++;
-                if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE))
+        if (has_not_enough_free_secs(sbi, nfree))
-                        break;
+                goto gc_more;
-                for (i = 0; i < sbi->segs_per_sec; i++) {
+        if (gc_type == FG_GC)
-                        /*
+                write_checkpoint(sbi, false);
-                         * do_garbage_collect will give us three gc_status:
-                         * GC_ERROR, GC_DONE, and GC_BLOCKED.
-                         * If GC is finished uncleanly, we have to return
-                         * the victim to dirty segment list.
-                         */
-                        gc_status = do_garbage_collect(sbi, segno + i,
-                                        &ilist, gc_type);
-                        if (gc_status != GC_DONE)
-                                goto stop;
-                        nfree++;
-                }
-        }
 stop:
-        if (has_not_enough_free_secs(sbi) || gc_status == GC_BLOCKED) {
-                write_checkpoint(sbi, (gc_status == GC_BLOCKED), false);
-                if (nfree)
-                        goto gc_more;
-        }
        mutex_unlock(&sbi->gc_mutex);
        put_gc_inode(&ilist);
-        BUG_ON(!list_empty(&ilist));
+        return ret;
-        return gc_status;
 }
 void build_gc_manager(struct f2fs_sb_info *sbi)
@@ -727,7 +683,7 @@ void build_gc_manager(struct f2fs_sb_info *sbi)
        DIRTY_I(sbi)->v_ops = &default_v_ops;
 }
-int create_gc_caches(void)
+int __init create_gc_caches(void)
 {
        winode_slab = f2fs_kmem_cache_create("f2fs_gc_inodes",
                        sizeof(struct inode_entry), NULL);
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index b026d9354ccd..30b2db003acd 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -8,7 +8,6 @@
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
-#define GC_THREAD_NAME  "f2fs_gc_task"
 #define GC_THREAD_MIN_WB_PAGES          1       /*
                                                 * a threshold to determine
                                                 * whether IO subsystem is idle
@@ -23,15 +22,6 @@
 /* Search max. number of dirty segments to select a victim segment */
 #define MAX_VICTIM_SEARCH       20
-enum {
-        GC_NONE = 0,
-        GC_ERROR,
-        GC_OK,
-        GC_NEXT,
-        GC_BLOCKED,
-        GC_DONE,
-};
 struct f2fs_gc_kthread {
        struct task_struct *f2fs_gc_task;
        wait_queue_head_t gc_wait_queue_head;
@@ -104,14 +94,3 @@ static inline int is_idle(struct f2fs_sb_info *sbi)
        struct request_list *rl = &q->root_rl;
        return !(rl->count[BLK_RW_SYNC]) && !(rl->count[BLK_RW_ASYNC]);
 }
-static inline bool should_do_checkpoint(struct f2fs_sb_info *sbi)
-{
-        unsigned int pages_per_sec = sbi->segs_per_sec *
-                                        (1 << sbi->log_blocks_per_seg);
-        int node_secs = ((get_pages(sbi, F2FS_DIRTY_NODES) + pages_per_sec - 1)
-                        >> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
-        int dent_secs = ((get_pages(sbi, F2FS_DIRTY_DENTS) + pages_per_sec - 1)
-                        >> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
-        return free_sections(sbi) <= (node_secs + 2 * dent_secs + 2);
-}
diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c
index a60f04200f8b..6eb8d269b53b 100644
--- a/fs/f2fs/hash.c
+++ b/fs/f2fs/hash.c
@@ -42,7 +42,7 @@ static void TEA_transform(unsigned int buf[4], unsigned int const in[])
        buf[1] += b1;
 }
-static void str2hashbuf(const char *msg, int len, unsigned int *buf, int num)
+static void str2hashbuf(const char *msg, size_t len, unsigned int *buf, int num)
 {
        unsigned pad, val;
        int i;
@@ -69,13 +69,17 @@ static void str2hashbuf(const char *msg, int len, unsigned int *buf, int num)
                *buf++ = pad;
 }
-f2fs_hash_t f2fs_dentry_hash(const char *name, int len)
+f2fs_hash_t f2fs_dentry_hash(const char *name, size_t len)
 {
-        __u32 hash, minor_hash;
+        __u32 hash;
        f2fs_hash_t f2fs_hash;
        const char *p;
        __u32 in[8], buf[4];
+        if ((len <= 2) && (name[0] == '.') &&
+                (name[1] == '.' || name[1] == '\0'))
+                return 0;
        /* Initialize the default seed for the hash checksum functions */
        buf[0] = 0x67452301;
        buf[1] = 0xefcdab89;
@@ -83,15 +87,15 @@ f2fs_hash_t f2fs_dentry_hash(const char *name, int len)
        buf[3] = 0x10325476;
        p = name;
-        while (len > 0) {
+        while (1) {
                str2hashbuf(p, len, in, 4);
                TEA_transform(buf, in);
-                len -= 16;
                p += 16;
+                if (len <= 16)
+                        break;
+                len -= 16;
        }
        hash = buf[0];
-        minor_hash = buf[1];
        f2fs_hash = cpu_to_le32(hash & ~F2FS_HASH_COL_BIT);
        return f2fs_hash;
 }
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index df5fb381ebf1..ddae412d30c8 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -16,11 +16,6 @@
 #include "f2fs.h"
 #include "node.h"
-struct f2fs_iget_args {
-        u64 ino;
-        int on_free;
-};
 void f2fs_set_inode_flags(struct inode *inode)
 {
        unsigned int flags = F2FS_I(inode)->i_flags;
@@ -40,34 +35,6 @@ void f2fs_set_inode_flags(struct inode *inode)
                inode->i_flags |= S_DIRSYNC;
 }
-static int f2fs_iget_test(struct inode *inode, void *data)
-{
-        struct f2fs_iget_args *args = data;
-        if (inode->i_ino != args->ino)
-                return 0;
-        if (inode->i_state & (I_FREEING | I_WILL_FREE)) {
-                args->on_free = 1;
-                return 0;
-        }
-        return 1;
-}
-struct inode *f2fs_iget_nowait(struct super_block *sb, unsigned long ino)
-{
-        struct f2fs_iget_args args = {
-                .ino = ino,
-                .on_free = 0
-        };
-        struct inode *inode = ilookup5(sb, ino, f2fs_iget_test, &args);
-        if (inode)
-                return inode;
-        if (!args.on_free)
-                return f2fs_iget(sb, ino);
-        return ERR_PTR(-ENOENT);
-}
 static int do_read_inode(struct inode *inode)
 {
        struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
@@ -100,6 +67,10 @@ static int do_read_inode(struct inode *inode)
        inode->i_ctime.tv_nsec = le32_to_cpu(ri->i_ctime_nsec);
        inode->i_mtime.tv_nsec = le32_to_cpu(ri->i_mtime_nsec);
        inode->i_generation = le32_to_cpu(ri->i_generation);
+        if (ri->i_addr[0])
+                inode->i_rdev = old_decode_dev(le32_to_cpu(ri->i_addr[0]));
+        else
+                inode->i_rdev = new_decode_dev(le32_to_cpu(ri->i_addr[1]));
        fi->i_current_depth = le32_to_cpu(ri->i_current_depth);
        fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid);
@@ -203,6 +174,21 @@ void update_inode(struct inode *inode, struct page *node_page)
        ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags);
        ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino);
        ri->i_generation = cpu_to_le32(inode->i_generation);
+        if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
+                if (old_valid_dev(inode->i_rdev)) {
+                        ri->i_addr[0] =
+                                cpu_to_le32(old_encode_dev(inode->i_rdev));
+                        ri->i_addr[1] = 0;
+                } else {
+                        ri->i_addr[0] = 0;
+                        ri->i_addr[1] =
+                                cpu_to_le32(new_encode_dev(inode->i_rdev));
+                        ri->i_addr[2] = 0;
+                }
+        }
+        set_cold_node(inode, node_page);
        set_page_dirty(node_page);
 }
@@ -216,6 +202,9 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
                        inode->i_ino == F2FS_META_INO(sbi))
                return 0;
+        if (wbc)
+                f2fs_balance_fs(sbi);
        node_page = get_node_page(sbi, inode->i_ino);
        if (IS_ERR(node_page))
                return PTR_ERR(node_page);
@@ -256,6 +245,7 @@ void f2fs_evict_inode(struct inode *inode)
        if (inode->i_nlink || is_bad_inode(inode))
                goto no_delete;
+        sb_start_intwrite(inode->i_sb);
        set_inode_flag(F2FS_I(inode), FI_NO_ALLOC);
        i_size_write(inode, 0);
@@ -263,6 +253,7 @@ void f2fs_evict_inode(struct inode *inode)
                f2fs_truncate(inode);
        remove_inode_page(inode);
+        sb_end_intwrite(inode->i_sb);
 no_delete:
        clear_inode(inode);
 }
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 89b7675dc377..1a49b881bac0 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -77,8 +77,8 @@ fail:
 static int is_multimedia_file(const unsigned char *s, const char *sub)
 {
-        int slen = strlen(s);
+        size_t slen = strlen(s);
-        int sublen = strlen(sub);
+        size_t sublen = strlen(sub);
        int ret;
        if (sublen > slen)
@@ -123,6 +123,8 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
        nid_t ino = 0;
        int err;
+        f2fs_balance_fs(sbi);
        inode = f2fs_new_inode(dir, mode);
        if (IS_ERR(inode))
                return PTR_ERR(inode);
@@ -144,8 +146,6 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
        if (!sbi->por_doing)
                d_instantiate(dentry, inode);
        unlock_new_inode(inode);
-        f2fs_balance_fs(sbi);
        return 0;
 out:
        clear_nlink(inode);
@@ -163,6 +163,8 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
        struct f2fs_sb_info *sbi = F2FS_SB(sb);
        int err;
+        f2fs_balance_fs(sbi);
        inode->i_ctime = CURRENT_TIME;
        atomic_inc(&inode->i_count);
@@ -172,8 +174,6 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
                goto out;
        d_instantiate(dentry, inode);
-        f2fs_balance_fs(sbi);
        return 0;
 out:
        clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
@@ -223,6 +223,8 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
        struct page *page;
        int err = -ENOENT;
+        f2fs_balance_fs(sbi);
        de = f2fs_find_entry(dir, &dentry->d_name, &page);
        if (!de)
                goto fail;
@@ -238,7 +240,6 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
        /* In order to evict this inode,  we set it dirty */
        mark_inode_dirty(inode);
-        f2fs_balance_fs(sbi);
 fail:
        return err;
 }
@@ -249,9 +250,11 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
        struct super_block *sb = dir->i_sb;
        struct f2fs_sb_info *sbi = F2FS_SB(sb);
        struct inode *inode;
-        unsigned symlen = strlen(symname) + 1;
+        size_t symlen = strlen(symname) + 1;
        int err;
+        f2fs_balance_fs(sbi);
        inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO);
        if (IS_ERR(inode))
                return PTR_ERR(inode);
@@ -268,9 +271,6 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
        d_instantiate(dentry, inode);
        unlock_new_inode(inode);
-        f2fs_balance_fs(sbi);
        return err;
 out:
        clear_nlink(inode);
@@ -286,6 +286,8 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
        struct inode *inode;
        int err;
+        f2fs_balance_fs(sbi);
        inode = f2fs_new_inode(dir, S_IFDIR | mode);
        if (IS_ERR(inode))
                return PTR_ERR(inode);
@@ -305,7 +307,6 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
        d_instantiate(dentry, inode);
        unlock_new_inode(inode);
-        f2fs_balance_fs(sbi);
        return 0;
 out_fail:
@@ -336,6 +337,8 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
        if (!new_valid_dev(rdev))
                return -EINVAL;
+        f2fs_balance_fs(sbi);
        inode = f2fs_new_inode(dir, mode);
        if (IS_ERR(inode))
                return PTR_ERR(inode);
@@ -350,9 +353,6 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
        alloc_nid_done(sbi, inode->i_ino);
        d_instantiate(dentry, inode);
        unlock_new_inode(inode);
-        f2fs_balance_fs(sbi);
        return 0;
 out:
        clear_nlink(inode);
@@ -376,6 +376,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
        struct f2fs_dir_entry *new_entry;
        int err = -ENOENT;
+        f2fs_balance_fs(sbi);
        old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
        if (!old_entry)
                goto out;
@@ -441,8 +443,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
        }
        mutex_unlock_op(sbi, RENAME);
-        f2fs_balance_fs(sbi);
        return 0;
 out_dir:
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 19870361497e..e275218904ed 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -104,7 +104,7 @@ static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid)
                        f2fs_put_page(page, 1);
                        continue;
                }
-                page_cache_release(page);
+                f2fs_put_page(page, 0);
        }
 }
@@ -484,12 +484,14 @@ static void truncate_node(struct dnode_of_data *dn)
        struct node_info ni;
        get_node_info(sbi, dn->nid, &ni);
+        if (dn->inode->i_blocks == 0) {
+                BUG_ON(ni.blk_addr != NULL_ADDR);
+                goto invalidate;
+        }
        BUG_ON(ni.blk_addr == NULL_ADDR);
-        if (ni.blk_addr != NULL_ADDR)
-                invalidate_blocks(sbi, ni.blk_addr);
        /* Deallocate node address */
+        invalidate_blocks(sbi, ni.blk_addr);
        dec_valid_node_count(sbi, dn->inode, 1);
        set_node_addr(sbi, &ni, NULL_ADDR);
@@ -499,7 +501,7 @@ static void truncate_node(struct dnode_of_data *dn)
        } else {
                sync_inode_page(dn);
        }
+invalidate:
        clear_node_page_dirty(dn->node_page);
        F2FS_SET_SB_DIRT(sbi);
@@ -658,7 +660,7 @@ int truncate_inode_blocks(struct inode *inode, pgoff_t from)
        struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
        int err = 0, cont = 1;
        int level, offset[4], noffset[4];
-        unsigned int nofs;
+        unsigned int nofs = 0;
        struct f2fs_node *rn;
        struct dnode_of_data dn;
        struct page *page;
@@ -768,25 +770,17 @@ int remove_inode_page(struct inode *inode)
                dn.inode_page_locked = 1;
                truncate_node(&dn);
        }
-        if (inode->i_blocks == 1) {
-                /* inernally call f2fs_put_page() */
-                set_new_dnode(&dn, inode, page, page, ino);
-                truncate_node(&dn);
-        } else if (inode->i_blocks == 0) {
-                struct node_info ni;
-                get_node_info(sbi, inode->i_ino, &ni);
-                /* called after f2fs_new_inode() is failed */
+        /* 0 is possible, after f2fs_new_inode() is failed */
-                BUG_ON(ni.blk_addr != NULL_ADDR);
+        BUG_ON(inode->i_blocks != 0 && inode->i_blocks != 1);
-                f2fs_put_page(page, 1);
+        set_new_dnode(&dn, inode, page, page, ino);
-        } else {
+        truncate_node(&dn);
-                BUG();
-        }
        mutex_unlock_op(sbi, NODE_TRUNC);
        return 0;
 }
-int new_inode_page(struct inode *inode, struct dentry *dentry)
+int new_inode_page(struct inode *inode, const struct qstr *name)
 {
        struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
        struct page *page;
@@ -796,7 +790,7 @@ int new_inode_page(struct inode *inode, struct dentry *dentry)
        set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino);
        mutex_lock_op(sbi, NODE_NEW);
        page = new_node_page(&dn, 0);
-        init_dent_inode(dentry, page);
+        init_dent_inode(name, page);
        mutex_unlock_op(sbi, NODE_NEW);
        if (IS_ERR(page))
                return PTR_ERR(page);
@@ -834,17 +828,18 @@ struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs)
                goto fail;
        }
        set_node_addr(sbi, &new_ni, NEW_ADDR);
+        set_cold_node(dn->inode, page);
        dn->node_page = page;
        sync_inode_page(dn);
        set_page_dirty(page);
-        set_cold_node(dn->inode, page);
        if (ofs == 0)
                inc_valid_inode_count(sbi);
        return page;
 fail:
+        clear_node_page_dirty(page);
        f2fs_put_page(page, 1);
        return ERR_PTR(err);
 }
@@ -879,15 +874,11 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
                return;
        if (read_node_page(apage, READA))
-                goto unlock_out;
+                unlock_page(apage);
-        page_cache_release(apage);
-        return;
-unlock_out:
-        unlock_page(apage);
 release_out:
-        page_cache_release(apage);
+        f2fs_put_page(apage, 0);
+        return;
 }
 struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
@@ -1093,7 +1084,6 @@ static int f2fs_write_node_page(struct page *page,
 {
        struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
        nid_t nid;
-        unsigned int nofs;
        block_t new_addr;
        struct node_info ni;
@@ -1110,7 +1100,6 @@ static int f2fs_write_node_page(struct page *page,
        /* get old block addr of this node page */
        nid = nid_of_node(page);
-        nofs = ofs_of_node(page);
        BUG_ON(page->index != nid);
        get_node_info(sbi, nid, &ni);
@@ -1131,6 +1120,12 @@ static int f2fs_write_node_page(struct page *page,
        return 0;
 }
+/*
+ * It is very important to gather dirty pages and write at once, so that we can
+ * submit a big bio without interfering other data writes.
+ * Be default, 512 pages (2MB), a segment size, is quite reasonable.
+ */
+#define COLLECT_DIRTY_NODES     512
 static int f2fs_write_node_pages(struct address_space *mapping,
                            struct writeback_control *wbc)
 {
@@ -1138,17 +1133,16 @@ static int f2fs_write_node_pages(struct address_space *mapping,
        struct block_device *bdev = sbi->sb->s_bdev;
        long nr_to_write = wbc->nr_to_write;
-        if (wbc->for_kupdate)
+        /* First check balancing cached NAT entries */
-                return 0;
-        if (get_pages(sbi, F2FS_DIRTY_NODES) == 0)
-                return 0;
        if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK)) {
-                write_checkpoint(sbi, false, false);
+                write_checkpoint(sbi, false);
                return 0;
        }
+        /* collect a number of dirty node pages and write together */
+        if (get_pages(sbi, F2FS_DIRTY_NODES) < COLLECT_DIRTY_NODES)
+                return 0;
        /* if mounting is failed, skip writing node pages */
        wbc->nr_to_write = bio_get_nr_vecs(bdev);
        sync_node_pages(sbi, 0, wbc);
@@ -1571,7 +1565,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
                nid_t nid;
                struct f2fs_nat_entry raw_ne;
                int offset = -1;
-                block_t old_blkaddr, new_blkaddr;
+                block_t new_blkaddr;
                ne = list_entry(cur, struct nat_entry, list);
                nid = nat_get_nid(ne);
@@ -1585,7 +1579,6 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
                offset = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 1);
                if (offset >= 0) {
                        raw_ne = nat_in_journal(sum, offset);
-                        old_blkaddr = le32_to_cpu(raw_ne.block_addr);
                        goto flush_now;
                }
 to_nat_page:
@@ -1607,7 +1600,6 @@ to_nat_page:
                BUG_ON(!nat_blk);
                raw_ne = nat_blk->entries[nid - start_nid];
-                old_blkaddr = le32_to_cpu(raw_ne.block_addr);
 flush_now:
                new_blkaddr = nat_get_blkaddr(ne);
@@ -1741,7 +1733,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
        kfree(nm_i);
 }
-int create_node_manager_caches(void)
+int __init create_node_manager_caches(void)
 {
        nat_entry_slab = f2fs_kmem_cache_create("nat_entry",
                        sizeof(struct nat_entry), NULL);
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index b07e9b6ef376..b235215ac138 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -42,7 +42,7 @@ static int recover_dentry(struct page *ipage, struct inode *inode)
 {
        struct f2fs_node *raw_node = (struct f2fs_node *)kmap(ipage);
        struct f2fs_inode *raw_inode = &(raw_node->i);
-        struct dentry dent, parent;
+        struct qstr name;
        struct f2fs_dir_entry *de;
        struct page *page;
        struct inode *dir;
@@ -57,17 +57,15 @@ static int recover_dentry(struct page *ipage, struct inode *inode)
                goto out;
        }
-        parent.d_inode = dir;
+        name.len = le32_to_cpu(raw_inode->i_namelen);
-        dent.d_parent = &parent;
+        name.name = raw_inode->i_name;
-        dent.d_name.len = le32_to_cpu(raw_inode->i_namelen);
-        dent.d_name.name = raw_inode->i_name;
-        de = f2fs_find_entry(dir, &dent.d_name, &page);
+        de = f2fs_find_entry(dir, &name, &page);
        if (de) {
                kunmap(page);
                f2fs_put_page(page, 0);
        } else {
-                f2fs_add_link(&dent, inode);
+                err = __f2fs_add_link(dir, &name, inode);
        }
        iput(dir);
 out:
@@ -144,14 +142,14 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
                                goto out;
                        }
-                        INIT_LIST_HEAD(&entry->list);
-                        list_add_tail(&entry->list, head);
                        entry->inode = f2fs_iget(sbi->sb, ino_of_node(page));
                        if (IS_ERR(entry->inode)) {
                                err = PTR_ERR(entry->inode);
+                                kmem_cache_free(fsync_entry_slab, entry);
                                goto out;
                        }
+                        list_add_tail(&entry->list, head);
                        entry->blkaddr = blkaddr;
                }
                if (IS_INODE(page)) {
@@ -173,10 +171,9 @@ out:
 static void destroy_fsync_dnodes(struct f2fs_sb_info *sbi,
                                        struct list_head *head)
 {
-        struct list_head *this;
+        struct fsync_inode_entry *entry, *tmp;
-        struct fsync_inode_entry *entry;
-        list_for_each(this, head) {
+        list_for_each_entry_safe(entry, tmp, head, list) {
-                entry = list_entry(this, struct fsync_inode_entry, list);
                iput(entry->inode);
                list_del(&entry->list);
                kmem_cache_free(fsync_entry_slab, entry);
@@ -227,7 +224,10 @@ static void check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
        f2fs_put_page(node_page, 1);
        /* Deallocate previous index in the node page */
-        inode = f2fs_iget_nowait(sbi->sb, ino);
+        inode = f2fs_iget(sbi->sb, ino);
+        if (IS_ERR(inode))
+                return;
        truncate_hole(inode, bidx, bidx + 1);
        iput(inode);
 }
@@ -371,5 +371,5 @@ void recover_fsync_data(struct f2fs_sb_info *sbi)
 out:
        destroy_fsync_dnodes(sbi, &inode_list);
        kmem_cache_destroy(fsync_entry_slab);
-        write_checkpoint(sbi, false, false);
+        write_checkpoint(sbi, false);
 }
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 1b26e4ea1016..777f17e496e6 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -12,57 +12,26 @@
 #include <linux/f2fs_fs.h>
 #include <linux/bio.h>
 #include <linux/blkdev.h>
+#include <linux/prefetch.h>
 #include <linux/vmalloc.h>
 #include "f2fs.h"
 #include "segment.h"
 #include "node.h"
-static int need_to_flush(struct f2fs_sb_info *sbi)
-{
-        unsigned int pages_per_sec = (1 << sbi->log_blocks_per_seg) *
-                        sbi->segs_per_sec;
-        int node_secs = ((get_pages(sbi, F2FS_DIRTY_NODES) + pages_per_sec - 1)
-                >> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
-        int dent_secs = ((get_pages(sbi, F2FS_DIRTY_DENTS) + pages_per_sec - 1)
-                >> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
-        if (sbi->por_doing)
-                return 0;
-        if (free_sections(sbi) <= (node_secs + 2 * dent_secs +
-                                                reserved_sections(sbi)))
-                return 1;
-        return 0;
-}
 /*
 * This function balances dirty node and dentry pages.
 * In addition, it controls garbage collection.
 */
 void f2fs_balance_fs(struct f2fs_sb_info *sbi)
 {
-        struct writeback_control wbc = {
-                .sync_mode = WB_SYNC_ALL,
-                .nr_to_write = LONG_MAX,
-                .for_reclaim = 0,
-        };
-        if (sbi->por_doing)
-                return;
        /*
-         * We should do checkpoint when there are so many dirty node pages
+         * We should do GC or end up with checkpoint, if there are so many dirty
-         * with enough free segments. After then, we should do GC.
+         * dir/node pages without enough free segments.
         */
-        if (need_to_flush(sbi)) {
+        if (has_not_enough_free_secs(sbi, 0)) {
-                sync_dirty_dir_inodes(sbi);
-                sync_node_pages(sbi, 0, &wbc);
-        }
-        if (has_not_enough_free_secs(sbi)) {
                mutex_lock(&sbi->gc_mutex);
-                f2fs_gc(sbi, 1);
+                f2fs_gc(sbi);
        }
 }
@@ -339,7 +308,7 @@ static unsigned int check_prefree_segments(struct f2fs_sb_info *sbi,
         * If there is not enough reserved sections,
         * we should not reuse prefree segments.
         */
-        if (has_not_enough_free_secs(sbi))
+        if (has_not_enough_free_secs(sbi, 0))
                return NULL_SEGNO;
        /*
@@ -567,6 +536,23 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse)
        }
 }
+static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
+{
+        struct curseg_info *curseg = CURSEG_I(sbi, type);
+        const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
+        if (IS_NODESEG(type) || !has_not_enough_free_secs(sbi, 0))
+                return v_ops->get_victim(sbi,
+                                &(curseg)->next_segno, BG_GC, type, SSR);
+        /* For data segments, let's do SSR more intensively */
+        for (; type >= CURSEG_HOT_DATA; type--)
+                if (v_ops->get_victim(sbi, &(curseg)->next_segno,
+                                                BG_GC, type, SSR))
+                        return 1;
+        return 0;
+}
 /*
 * flush out current segment and replace it with new segment
 * This function should be returned with success, otherwise BUG
@@ -631,7 +617,7 @@ static void f2fs_end_io_write(struct bio *bio, int err)
                        if (page->mapping)
                                set_bit(AS_EIO, &page->mapping->flags);
                        set_ckpt_flags(p->sbi->ckpt, CP_ERROR_FLAG);
-                        set_page_dirty(page);
+                        p->sbi->sb->s_flags |= MS_RDONLY;
                }
                end_page_writeback(page);
                dec_page_count(p->sbi, F2FS_WRITEBACK);
@@ -791,11 +777,10 @@ static int __get_segment_type(struct page *page, enum page_type p_type)
                return __get_segment_type_2(page, p_type);
        case 4:
                return __get_segment_type_4(page, p_type);
-        case 6:
-                return __get_segment_type_6(page, p_type);
-        default:
-                BUG();
        }
+        /* NR_CURSEG_TYPE(6) logs by default */
+        BUG_ON(sbi->active_logs != NR_CURSEG_TYPE);
+        return __get_segment_type_6(page, p_type);
 }
 static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
@@ -848,15 +833,10 @@ static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
        mutex_unlock(&curseg->curseg_mutex);
 }
-int write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
+void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
-                        struct writeback_control *wbc)
 {
-        if (wbc->for_reclaim)
-                return AOP_WRITEPAGE_ACTIVATE;
        set_page_writeback(page);
        submit_write_page(sbi, page, page->index, META);
-        return 0;
 }
 void write_node_page(struct f2fs_sb_info *sbi, struct page *page,
@@ -1608,7 +1588,6 @@ static int build_dirty_segmap(struct f2fs_sb_info *sbi)
        for (i = 0; i < NR_DIRTY_TYPE; i++) {
                dirty_i->dirty_segmap[i] = kzalloc(bitmap_size, GFP_KERNEL);
-                dirty_i->nr_dirty[i] = 0;
                if (!dirty_i->dirty_segmap[i])
                        return -ENOMEM;
        }
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 0948405af6f5..552dadbb2327 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -450,16 +450,16 @@ static inline bool need_SSR(struct f2fs_sb_info *sbi)
        return (free_sections(sbi) < overprovision_sections(sbi));
 }
-static inline int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
+static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed)
 {
-        struct curseg_info *curseg = CURSEG_I(sbi, type);
+        int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
-        return DIRTY_I(sbi)->v_ops->get_victim(sbi,
+        int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
-                                &(curseg)->next_segno, BG_GC, type, SSR);
-}
-static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi)
+        if (sbi->por_doing)
-{
+                return false;
-        return free_sections(sbi) <= reserved_sections(sbi);
+        return ((free_sections(sbi) + freed) <= (node_secs + 2 * dent_secs +
+                                                reserved_sections(sbi)));
 }
 static inline int utilization(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 13867322cf5a..8c117649a035 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -53,6 +53,18 @@ static match_table_t f2fs_tokens = {
        {Opt_err, NULL},
 };
+void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...)
+{
+        struct va_format vaf;
+        va_list args;
+        va_start(args, fmt);
+        vaf.fmt = fmt;
+        vaf.va = &args;
+        printk("%sF2FS-fs (%s): %pV\n", level, sb->s_id, &vaf);
+        va_end(args);
+}
 static void init_once(void *foo)
 {
        struct f2fs_inode_info *fi = (struct f2fs_inode_info *) foo;
@@ -100,7 +112,7 @@ static void f2fs_put_super(struct super_block *sb)
        f2fs_destroy_stats(sbi);
        stop_gc_thread(sbi);
-        write_checkpoint(sbi, false, true);
+        write_checkpoint(sbi, true);
        iput(sbi->node_inode);
        iput(sbi->meta_inode);
@@ -119,15 +131,32 @@ static void f2fs_put_super(struct super_block *sb)
 int f2fs_sync_fs(struct super_block *sb, int sync)
 {
        struct f2fs_sb_info *sbi = F2FS_SB(sb);
-        int ret = 0;
        if (!sbi->s_dirty && !get_pages(sbi, F2FS_DIRTY_NODES))
                return 0;
        if (sync)
-                write_checkpoint(sbi, false, false);
+                write_checkpoint(sbi, false);
+        else
+                f2fs_balance_fs(sbi);
-        return ret;
+        return 0;
+}
+static int f2fs_freeze(struct super_block *sb)
+{
+        int err;
+        if (sb->s_flags & MS_RDONLY)
+                return 0;
+        err = f2fs_sync_fs(sb, 1);
+        return err;
+}
+static int f2fs_unfreeze(struct super_block *sb)
+{
+        return 0;
 }
 static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
@@ -148,8 +177,8 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
        buf->f_bfree = buf->f_blocks - valid_user_blocks(sbi) - ovp_count;
        buf->f_bavail = user_block_count - valid_user_blocks(sbi);
-        buf->f_files = valid_inode_count(sbi);
+        buf->f_files = sbi->total_node_count;
-        buf->f_ffree = sbi->total_node_count - valid_node_count(sbi);
+        buf->f_ffree = sbi->total_node_count - valid_inode_count(sbi);
        buf->f_namelen = F2FS_MAX_NAME_LEN;
        buf->f_fsid.val[0] = (u32)id;
@@ -185,7 +214,7 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
                seq_puts(seq, ",noacl");
 #endif
        if (test_opt(sbi, DISABLE_EXT_IDENTIFY))
-                seq_puts(seq, ",disable_ext_indentify");
+                seq_puts(seq, ",disable_ext_identify");
        seq_printf(seq, ",active_logs=%u", sbi->active_logs);
@@ -200,6 +229,8 @@ static struct super_operations f2fs_sops = {
        .evict_inode    = f2fs_evict_inode,
        .put_super      = f2fs_put_super,
        .sync_fs        = f2fs_sync_fs,
+        .freeze_fs      = f2fs_freeze,
+        .unfreeze_fs    = f2fs_unfreeze,
        .statfs         = f2fs_statfs,
 };
@@ -248,7 +279,8 @@ static const struct export_operations f2fs_export_ops = {
        .get_parent = f2fs_get_parent,
 };
-static int parse_options(struct f2fs_sb_info *sbi, char *options)
+static int parse_options(struct super_block *sb, struct f2fs_sb_info *sbi,
+                                char *options)
 {
        substring_t args[MAX_OPT_ARGS];
        char *p;
@@ -287,7 +319,8 @@ static int parse_options(struct f2fs_sb_info *sbi, char *options)
                        break;
 #else
                case Opt_nouser_xattr:
-                        pr_info("nouser_xattr options not supported\n");
+                        f2fs_msg(sb, KERN_INFO,
+                                "nouser_xattr options not supported");
                        break;
 #endif
 #ifdef CONFIG_F2FS_FS_POSIX_ACL
@@ -296,13 +329,13 @@ static int parse_options(struct f2fs_sb_info *sbi, char *options)
                        break;
 #else
                case Opt_noacl:
-                        pr_info("noacl options not supported\n");
+                        f2fs_msg(sb, KERN_INFO, "noacl options not supported");
                        break;
 #endif
                case Opt_active_logs:
                        if (args->from && match_int(args, &arg))
                                return -EINVAL;
-                        if (arg != 2 && arg != 4 && arg != 6)
+                        if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE)
                                return -EINVAL;
                        sbi->active_logs = arg;
                        break;
@@ -310,8 +343,9 @@ static int parse_options(struct f2fs_sb_info *sbi, char *options)
                        set_opt(sbi, DISABLE_EXT_IDENTIFY);
                        break;
                default:
-                        pr_err("Unrecognized mount option \"%s\" or missing value\n",
+                        f2fs_msg(sb, KERN_ERR,
-                                        p);
+                                "Unrecognized mount option \"%s\" or missing value",
+                                p);
                        return -EINVAL;
                }
        }
@@ -338,30 +372,53 @@ static loff_t max_file_size(unsigned bits)
        return result;
 }
-static int sanity_check_raw_super(struct f2fs_super_block *raw_super)
+static int sanity_check_raw_super(struct super_block *sb,
+                        struct f2fs_super_block *raw_super)
 {
        unsigned int blocksize;
-        if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic))
+        if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic)) {
+                f2fs_msg(sb, KERN_INFO,
+                        "Magic Mismatch, valid(0x%x) - read(0x%x)",
+                        F2FS_SUPER_MAGIC, le32_to_cpu(raw_super->magic));
                return 1;
+        }
+        /* Currently, support only 4KB page cache size */
+        if (F2FS_BLKSIZE != PAGE_CACHE_SIZE) {
+                f2fs_msg(sb, KERN_INFO,
+                        "Invalid page_cache_size (%lu), supports only 4KB\n",
+                        PAGE_CACHE_SIZE);
+                return 1;
+        }
        /* Currently, support only 4KB block size */
        blocksize = 1 << le32_to_cpu(raw_super->log_blocksize);
-        if (blocksize != PAGE_CACHE_SIZE)
+        if (blocksize != F2FS_BLKSIZE) {
+                f2fs_msg(sb, KERN_INFO,
+                        "Invalid blocksize (%u), supports only 4KB\n",
+                        blocksize);
                return 1;
+        }
        if (le32_to_cpu(raw_super->log_sectorsize) !=
-                                        F2FS_LOG_SECTOR_SIZE)
+                                        F2FS_LOG_SECTOR_SIZE) {
+                f2fs_msg(sb, KERN_INFO, "Invalid log sectorsize");
                return 1;
+        }
        if (le32_to_cpu(raw_super->log_sectors_per_block) !=
-                                        F2FS_LOG_SECTORS_PER_BLOCK)
+                                        F2FS_LOG_SECTORS_PER_BLOCK) {
+                f2fs_msg(sb, KERN_INFO, "Invalid log sectors per block");
                return 1;
+        }
        return 0;
 }
-static int sanity_check_ckpt(struct f2fs_super_block *raw_super,
+static int sanity_check_ckpt(struct f2fs_sb_info *sbi)
-                                struct f2fs_checkpoint *ckpt)
 {
        unsigned int total, fsmeta;
+        struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
+        struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
        total = le32_to_cpu(raw_super->segment_count);
        fsmeta = le32_to_cpu(raw_super->segment_count_ckpt);
@@ -372,6 +429,11 @@ static int sanity_check_ckpt(struct f2fs_super_block *raw_super,
        if (fsmeta >= total)
                return 1;
+        if (is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) {
+                f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");
+                return 1;
+        }
        return 0;
 }
@@ -400,6 +462,32 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
                atomic_set(&sbi->nr_pages[i], 0);
 }
+static int validate_superblock(struct super_block *sb,
+                struct f2fs_super_block **raw_super,
+                struct buffer_head **raw_super_buf, sector_t block)
+{
+        const char *super = (block == 0 ? "first" : "second");
+        /* read f2fs raw super block */
+        *raw_super_buf = sb_bread(sb, block);
+        if (!*raw_super_buf) {
+                f2fs_msg(sb, KERN_ERR, "unable to read %s superblock",
+                                super);
+                return 1;
+        }
+        *raw_super = (struct f2fs_super_block *)
+                ((char *)(*raw_super_buf)->b_data + F2FS_SUPER_OFFSET);
+        /* sanity checking of raw super */
+        if (!sanity_check_raw_super(sb, *raw_super))
+                return 0;
+        f2fs_msg(sb, KERN_ERR, "Can't find a valid F2FS filesystem "
+                                "in %s superblock", super);
+        return 1;
+}
 static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 {
        struct f2fs_sb_info *sbi;
@@ -414,19 +502,17 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
        if (!sbi)
                return -ENOMEM;
-        /* set a temporary block size */
+        /* set a block size */
-        if (!sb_set_blocksize(sb, F2FS_BLKSIZE))
+        if (!sb_set_blocksize(sb, F2FS_BLKSIZE)) {
-                goto free_sbi;
+                f2fs_msg(sb, KERN_ERR, "unable to set blocksize");
-        /* read f2fs raw super block */
-        raw_super_buf = sb_bread(sb, 0);
-        if (!raw_super_buf) {
-                err = -EIO;
                goto free_sbi;
        }
-        raw_super = (struct f2fs_super_block *)
-                        ((char *)raw_super_buf->b_data + F2FS_SUPER_OFFSET);
+        if (validate_superblock(sb, &raw_super, &raw_super_buf, 0)) {
+                brelse(raw_super_buf);
+                if (validate_superblock(sb, &raw_super, &raw_super_buf, 1))
+                        goto free_sb_buf;
+        }
        /* init some FS parameters */
        sbi->active_logs = NR_CURSEG_TYPE;
@@ -439,11 +525,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
        set_opt(sbi, POSIX_ACL);
 #endif
        /* parse mount options */
-        if (parse_options(sbi, (char *)data))
+        if (parse_options(sb, sbi, (char *)data))
-                goto free_sb_buf;
-        /* sanity checking of raw super */
-        if (sanity_check_raw_super(raw_super))
                goto free_sb_buf;
        sb->s_maxbytes = max_file_size(le32_to_cpu(raw_super->log_blocksize));
@@ -478,18 +560,23 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
        /* get an inode for meta space */
        sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi));
        if (IS_ERR(sbi->meta_inode)) {
+                f2fs_msg(sb, KERN_ERR, "Failed to read F2FS meta data inode");
                err = PTR_ERR(sbi->meta_inode);
                goto free_sb_buf;
        }
        err = get_valid_checkpoint(sbi);
-        if (err)
+        if (err) {
+                f2fs_msg(sb, KERN_ERR, "Failed to get valid F2FS checkpoint");
                goto free_meta_inode;
+        }
        /* sanity checking of checkpoint */
        err = -EINVAL;
-        if (sanity_check_ckpt(raw_super, sbi->ckpt))
+        if (sanity_check_ckpt(sbi)) {
+                f2fs_msg(sb, KERN_ERR, "Invalid F2FS checkpoint");
                goto free_cp;
+        }
        sbi->total_valid_node_count =
                                le32_to_cpu(sbi->ckpt->valid_node_count);
@@ -503,38 +590,41 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
        INIT_LIST_HEAD(&sbi->dir_inode_list);
        spin_lock_init(&sbi->dir_inode_lock);
-        /* init super block */
-        if (!sb_set_blocksize(sb, sbi->blocksize))
-                goto free_cp;
        init_orphan_info(sbi);
        /* setup f2fs internal modules */
        err = build_segment_manager(sbi);
-        if (err)
+        if (err) {
+                f2fs_msg(sb, KERN_ERR,
+                        "Failed to initialize F2FS segment manager");
                goto free_sm;
+        }
        err = build_node_manager(sbi);
-        if (err)
+        if (err) {
+                f2fs_msg(sb, KERN_ERR,
+                        "Failed to initialize F2FS node manager");
                goto free_nm;
+        }
        build_gc_manager(sbi);
        /* get an inode for node space */
        sbi->node_inode = f2fs_iget(sb, F2FS_NODE_INO(sbi));
        if (IS_ERR(sbi->node_inode)) {
+                f2fs_msg(sb, KERN_ERR, "Failed to read node inode");
                err = PTR_ERR(sbi->node_inode);
                goto free_nm;
        }
        /* if there are nt orphan nodes free them */
        err = -EINVAL;
-        if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG) &&
+        if (recover_orphan_inodes(sbi))
-                                recover_orphan_inodes(sbi))
                goto free_node_inode;
        /* read root inode and dentry */
        root = f2fs_iget(sb, F2FS_ROOT_INO(sbi));
        if (IS_ERR(root)) {
+                f2fs_msg(sb, KERN_ERR, "Failed to read root inode");
                err = PTR_ERR(root);
                goto free_node_inode;
        }
@@ -548,8 +638,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
        }
        /* recover fsynced data */
-        if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG) &&
+        if (!test_opt(sbi, DISABLE_ROLL_FORWARD))
-                                !test_opt(sbi, DISABLE_ROLL_FORWARD))
                recover_fsync_data(sbi);
        /* After POR, we can run background GC thread */
@@ -599,7 +688,7 @@ static struct file_system_type f2fs_fs_type = {
        .fs_flags       = FS_REQUIRES_DEV,
 };
-static int init_inodecache(void)
+static int __init init_inodecache(void)
 {
        f2fs_inode_cachep = f2fs_kmem_cache_create("f2fs_inode_cache",
                        sizeof(struct f2fs_inode_info), NULL);
@@ -634,14 +723,17 @@ static int __init init_f2fs_fs(void)
        err = create_checkpoint_caches();
        if (err)
                goto fail;
-        return register_filesystem(&f2fs_fs_type);
+        err = register_filesystem(&f2fs_fs_type);
+        if (err)
+                goto fail;
+        f2fs_create_root_stats();
 fail:
        return err;
 }
 static void __exit exit_f2fs_fs(void)
 {
-        destroy_root_stats();
+        f2fs_destroy_root_stats();
        unregister_filesystem(&f2fs_fs_type);
        destroy_checkpoint_caches();
        destroy_gc_caches();
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 7d52e8dc0c59..8038c0496504 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -208,7 +208,7 @@ int f2fs_getxattr(struct inode *inode, int name_index, const char *name,
        struct page *page;
        void *base_addr;
        int error = 0, found = 0;
-        int value_len, name_len;
+        size_t value_len, name_len;
        if (name == NULL)
                return -EINVAL;
@@ -304,7 +304,8 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
        struct f2fs_xattr_entry *here, *last;
        struct page *page;
        void *base_addr;
-        int error, found, free, name_len, newsize;
+        int error, found, free, newsize;
+        size_t name_len;
        char *pval;
        if (name == NULL)
@@ -317,6 +318,8 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
        if (name_len > 255 || value_len > MAX_VALUE_LEN)
                return -ERANGE;
+        f2fs_balance_fs(sbi);
        mutex_lock_op(sbi, NODE_NEW);
        if (!fi->i_xattr_nid) {
                /* Allocate new attribute block */
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 58bf744dbf39..165012ef363a 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -698,7 +698,7 @@ out:
 static int fat_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        return __fat_readdir(inode, filp, dirent, filldir, 0, 0);
 }
@@ -779,7 +779,7 @@ static int fat_ioctl_readdir(struct inode *inode, struct file *filp,
 static long fat_dir_ioctl(struct file *filp, unsigned int cmd,
                          unsigned long arg)
 {
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct __fat_dirent __user *d1 = (struct __fat_dirent __user *)arg;
        int short_only, both;
@@ -819,7 +819,7 @@ FAT_IOCTL_FILLDIR_FUNC(fat_compat_ioctl_filldir, compat_dirent)
 static long fat_compat_dir_ioctl(struct file *filp, unsigned cmd,
                                 unsigned long arg)
 {
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct compat_dirent __user *d1 = compat_ptr(arg);
        int short_only, both;
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 12701a567752..e9cc3f0d58e2 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -95,6 +95,8 @@ struct msdos_sb_info {
        spinlock_t dir_hash_lock;
        struct hlist_head dir_hashtable[FAT_HASH_SIZE];
+        unsigned int dirty;           /* fs state before mount */
 };
 #define FAT_CACHE_VALID 0       /* special case for valid cache */
diff --git a/fs/fat/file.c b/fs/fat/file.c
index a62e0ecbe2db..3978f8ca1823 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -32,7 +32,7 @@ static int fat_ioctl_get_attributes(struct inode *inode, u32 __user *user_attr)
 static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
 {
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(file);
        struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
        int is_dir = S_ISDIR(inode->i_mode);
        u32 attr, oldattr;
@@ -116,7 +116,7 @@ out:
 long fat_generic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        u32 __user *user_attr = (u32 __user *)arg;
        switch (cmd) {
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index f8f491677a4a..acf6e479b443 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -341,12 +341,11 @@ struct inode *fat_iget(struct super_block *sb, loff_t i_pos)
 {
        struct msdos_sb_info *sbi = MSDOS_SB(sb);
        struct hlist_head *head = sbi->inode_hashtable + fat_hash(i_pos);
-        struct hlist_node *_p;
        struct msdos_inode_info *i;
        struct inode *inode = NULL;
        spin_lock(&sbi->inode_hash_lock);
-        hlist_for_each_entry(i, _p, head, i_fat_hash) {
+        hlist_for_each_entry(i, head, i_fat_hash) {
                BUG_ON(i->vfs_inode.i_sb != sb);
                if (i->i_pos != i_pos)
                        continue;
@@ -488,10 +487,59 @@ static void fat_evict_inode(struct inode *inode)
        fat_detach(inode);
 }
+static void fat_set_state(struct super_block *sb,
+                        unsigned int set, unsigned int force)
+{
+        struct buffer_head *bh;
+        struct fat_boot_sector *b;
+        struct msdos_sb_info *sbi = sb->s_fs_info;
+        /* do not change any thing if mounted read only */
+        if ((sb->s_flags & MS_RDONLY) && !force)
+                return;
+        /* do not change state if fs was dirty */
+        if (sbi->dirty) {
+                /* warn only on set (mount). */
+                if (set)
+                        fat_msg(sb, KERN_WARNING, "Volume was not properly "
+                                "unmounted. Some data may be corrupt. "
+                                "Please run fsck.");
+                return;
+        }
+        bh = sb_bread(sb, 0);
+        if (bh == NULL) {
+                fat_msg(sb, KERN_ERR, "unable to read boot sector "
+                        "to mark fs as dirty");
+                return;
+        }
+        b = (struct fat_boot_sector *) bh->b_data;
+        if (sbi->fat_bits == 32) {
+                if (set)
+                        b->fat32.state |= FAT_STATE_DIRTY;
+                else
+                        b->fat32.state &= ~FAT_STATE_DIRTY;
+        } else /* fat 16 and 12 */ {
+                if (set)
+                        b->fat16.state |= FAT_STATE_DIRTY;
+                else
+                        b->fat16.state &= ~FAT_STATE_DIRTY;
+        }
+        mark_buffer_dirty(bh);
+        sync_dirty_buffer(bh);
+        brelse(bh);
+}
 static void fat_put_super(struct super_block *sb)
 {
        struct msdos_sb_info *sbi = MSDOS_SB(sb);
+        fat_set_state(sb, 0, 0);
        iput(sbi->fsinfo_inode);
        iput(sbi->fat_inode);
@@ -566,8 +614,18 @@ static void __exit fat_destroy_inodecache(void)
 static int fat_remount(struct super_block *sb, int *flags, char *data)
 {
+        int new_rdonly;
        struct msdos_sb_info *sbi = MSDOS_SB(sb);
        *flags |= MS_NODIRATIME | (sbi->options.isvfat ? 0 : MS_NOATIME);
+        /* make sure we update state on remount. */
+        new_rdonly = *flags & MS_RDONLY;
+        if (new_rdonly != (sb->s_flags & MS_RDONLY)) {
+                if (new_rdonly)
+                        fat_set_state(sb, 0, 0);
+                else
+                        fat_set_state(sb, 1, 1);
+        }
        return 0;
 }
@@ -1298,17 +1356,17 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
        sbi->prev_free = FAT_START_ENT;
        sb->s_maxbytes = 0xffffffff;
-        if (!sbi->fat_length && b->fat32_length) {
+        if (!sbi->fat_length && b->fat32.length) {
                struct fat_boot_fsinfo *fsinfo;
                struct buffer_head *fsinfo_bh;
                /* Must be FAT32 */
                sbi->fat_bits = 32;
-                sbi->fat_length = le32_to_cpu(b->fat32_length);
+                sbi->fat_length = le32_to_cpu(b->fat32.length);
-                sbi->root_cluster = le32_to_cpu(b->root_cluster);
+                sbi->root_cluster = le32_to_cpu(b->fat32.root_cluster);
                /* MC - if info_sector is 0, don't multiply by 0 */
-                sbi->fsinfo_sector = le16_to_cpu(b->info_sector);
+                sbi->fsinfo_sector = le16_to_cpu(b->fat32.info_sector);
                if (sbi->fsinfo_sector == 0)
                        sbi->fsinfo_sector = 1;
@@ -1362,6 +1420,12 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
        if (sbi->fat_bits != 32)
                sbi->fat_bits = (total_clusters > MAX_FAT12) ? 16 : 12;
+        /* some OSes set FAT_STATE_DIRTY and clean it on unmount. */
+        if (sbi->fat_bits == 32)
+                sbi->dirty = b->fat32.state & FAT_STATE_DIRTY;
+        else /* fat 16 or 12 */
+                sbi->dirty = b->fat16.state & FAT_STATE_DIRTY;
        /* check that FAT table does not overflow */
        fat_clusters = sbi->fat_length * sb->s_blocksize * 8 / sbi->fat_bits;
        total_clusters = min(total_clusters, fat_clusters - FAT_START_ENT);
@@ -1456,6 +1520,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
                                        "the device does not support discard");
        }
+        fat_set_state(sb, 1, 0);
        return 0;
 out_invalid:
diff --git a/fs/fat/nfs.c b/fs/fat/nfs.c
index ef4b5faba87b..499c10438ca2 100644
--- a/fs/fat/nfs.c
+++ b/fs/fat/nfs.c
@@ -21,13 +21,12 @@ static struct inode *fat_dget(struct super_block *sb, int i_logstart)
 {
        struct msdos_sb_info *sbi = MSDOS_SB(sb);
        struct hlist_head *head;
-        struct hlist_node *_p;
        struct msdos_inode_info *i;
        struct inode *inode = NULL;
        head = sbi->dir_hashtable + fat_dir_hash(i_logstart);
        spin_lock(&sbi->dir_hash_lock);
-        hlist_for_each_entry(i, _p, head, i_dir_hash) {
+        hlist_for_each_entry(i, head, i_dir_hash) {
                BUG_ON(i->vfs_inode.i_sb != sb);
                if (i->i_logstart != i_logstart)
                        continue;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 71a600a19f06..6599222536eb 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -30,7 +30,7 @@
 static int setfl(int fd, struct file * filp, unsigned long arg)
 {
-        struct inode * inode = filp->f_path.dentry->d_inode;
+        struct inode * inode = file_inode(filp);
        int error = 0;
        /*
diff --git a/fs/file.c b/fs/file.c
index 15cb8618e95d..3906d9577a18 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -490,7 +490,7 @@ void exit_files(struct task_struct *tsk)
        }
 }
-static void __devinit fdtable_defer_list_init(int cpu)
+static void fdtable_defer_list_init(int cpu)
 {
        struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu);
        spin_lock_init(&fddef->lock);
@@ -516,7 +516,7 @@ struct files_struct init_files = {
                .close_on_exec  = init_files.close_on_exec_init,
                .open_fds       = init_files.open_fds_init,
        },
-        .file_lock      = __SPIN_LOCK_UNLOCKED(init_task.file_lock),
+        .file_lock      = __SPIN_LOCK_UNLOCKED(init_files.file_lock),
 };
 /*
diff --git a/fs/file_table.c b/fs/file_table.c
index de9e9653d611..aa07d3684a2e 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -94,8 +94,8 @@ int proc_nr_files(ctl_table *table, int write,
 #endif
 /* Find an unused file structure and return a pointer to it.
- * Returns NULL, if there are no more free file structures or
+ * Returns an error pointer if some error happend e.g. we over file
- * we run out of memory.
+ * structures limit, run out of memory or operation is not permitted.
 *
 * Be very careful using this.  You are responsible for
 * getting write access to any mount that you might assign
@@ -107,7 +107,8 @@ struct file *get_empty_filp(void)
 {
        const struct cred *cred = current_cred();
        static long old_max;
-        struct file * f;
+        struct file *f;
+        int error;
        /*
         * Privileged users can go above max_files
@@ -122,13 +123,16 @@ struct file *get_empty_filp(void)
        }
        f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
-        if (f == NULL)
+        if (unlikely(!f))
-                goto fail;
+                return ERR_PTR(-ENOMEM);
        percpu_counter_inc(&nr_files);
        f->f_cred = get_cred(cred);
-        if (security_file_alloc(f))
+        error = security_file_alloc(f);
-                goto fail_sec;
+        if (unlikely(error)) {
+                file_free(f);
+                return ERR_PTR(error);
+        }
        INIT_LIST_HEAD(&f->f_u.fu_list);
        atomic_long_set(&f->f_count, 1);
@@ -144,12 +148,7 @@ over:
                pr_info("VFS: file-max limit %lu reached\n", get_max_files());
                old_max = get_nr_files();
        }
-        goto fail;
+        return ERR_PTR(-ENFILE);
-fail_sec:
-        file_free(f);
-fail:
-        return NULL;
 }
 /**
@@ -173,8 +172,8 @@ struct file *alloc_file(struct path *path, fmode_t mode,
        struct file *file;
        file = get_empty_filp();
-        if (!file)
+        if (IS_ERR(file))
-                return NULL;
+                return file;
        file->f_path = *path;
        file->f_mapping = path->dentry->d_inode->i_mapping;
@@ -447,7 +446,7 @@ void mark_files_ro(struct super_block *sb)
        lg_global_lock(&files_lglock);
        do_file_list_for_each_entry(sb, f) {
-                if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))
+                if (!S_ISREG(file_inode(f)->i_mode))
                       continue;
                if (!file_count(f))
                        continue;
diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c
index bd447e88f208..664b07a53870 100644
--- a/fs/freevxfs/vxfs_lookup.c
+++ b/fs/freevxfs/vxfs_lookup.c
@@ -237,7 +237,7 @@ vxfs_lookup(struct inode *dip, struct dentry *dp, unsigned int flags)
 static int
 vxfs_readdir(struct file *fp, void *retp, filldir_t filler)
 {
-        struct inode            *ip = fp->f_path.dentry->d_inode;
+        struct inode            *ip = file_inode(fp);
        struct super_block      *sbp = ip->i_sb;
        u_long                  bsize = sbp->s_blocksize;
        u_long                  page, npages, block, pblocks, nblocks, offset;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 310972b72a66..21f46fb3a101 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -318,8 +318,14 @@ static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work)
 static int write_inode(struct inode *inode, struct writeback_control *wbc)
 {
-        if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode))
+        int ret;
-                return inode->i_sb->s_op->write_inode(inode, wbc);
+        if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) {
+                trace_writeback_write_inode_start(inode, wbc);
+                ret = inode->i_sb->s_op->write_inode(inode, wbc);
+                trace_writeback_write_inode(inode, wbc);
+                return ret;
+        }
        return 0;
 }
@@ -450,6 +456,8 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
        WARN_ON(!(inode->i_state & I_SYNC));
+        trace_writeback_single_inode_start(inode, wbc, nr_to_write);
        ret = do_writepages(mapping, wbc);
        /*
@@ -1150,8 +1158,12 @@ void __mark_inode_dirty(struct inode *inode, int flags)
         * dirty the inode itself
         */
        if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
+                trace_writeback_dirty_inode_start(inode, flags);
                if (sb->s_op->dirty_inode)
                        sb->s_op->dirty_inode(inode, flags);
+                trace_writeback_dirty_inode(inode, flags);
        }
        /*
@@ -1332,47 +1344,43 @@ void writeback_inodes_sb(struct super_block *sb, enum wb_reason reason)
 EXPORT_SYMBOL(writeback_inodes_sb);
 /**
- * writeback_inodes_sb_if_idle  -       start writeback if none underway
+ * try_to_writeback_inodes_sb_nr - try to start writeback if none underway
 * @sb: the superblock
- * @reason: reason why some writeback work was initiated
+ * @nr: the number of pages to write
+ * @reason: the reason of writeback
 *
- * Invoke writeback_inodes_sb if no writeback is currently underway.
+ * Invoke writeback_inodes_sb_nr if no writeback is currently underway.
 * Returns 1 if writeback was started, 0 if not.
 */
-int writeback_inodes_sb_if_idle(struct super_block *sb, enum wb_reason reason)
+int try_to_writeback_inodes_sb_nr(struct super_block *sb,
+                                  unsigned long nr,
+                                  enum wb_reason reason)
 {
-        if (!writeback_in_progress(sb->s_bdi)) {
+        if (writeback_in_progress(sb->s_bdi))
-                down_read(&sb->s_umount);
-                writeback_inodes_sb(sb, reason);
-                up_read(&sb->s_umount);
                return 1;
-        } else
+        if (!down_read_trylock(&sb->s_umount))
                return 0;
+        writeback_inodes_sb_nr(sb, nr, reason);
+        up_read(&sb->s_umount);
+        return 1;
 }
-EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
+EXPORT_SYMBOL(try_to_writeback_inodes_sb_nr);
 /**
- * writeback_inodes_sb_nr_if_idle       -       start writeback if none underway
+ * try_to_writeback_inodes_sb - try to start writeback if none underway
 * @sb: the superblock
- * @nr: the number of pages to write
 * @reason: reason why some writeback work was initiated
 *
- * Invoke writeback_inodes_sb if no writeback is currently underway.
+ * Implement by try_to_writeback_inodes_sb_nr()
 * Returns 1 if writeback was started, 0 if not.
 */
-int writeback_inodes_sb_nr_if_idle(struct super_block *sb,
+int try_to_writeback_inodes_sb(struct super_block *sb, enum wb_reason reason)
-                                   unsigned long nr,
-                                   enum wb_reason reason)
 {
-        if (!writeback_in_progress(sb->s_bdi)) {
+        return try_to_writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason);
-                down_read(&sb->s_umount);
-                writeback_inodes_sb_nr(sb, nr, reason);
-                up_read(&sb->s_umount);
-                return 1;
-        } else
-                return 0;
 }
-EXPORT_SYMBOL(writeback_inodes_sb_nr_if_idle);
+EXPORT_SYMBOL(try_to_writeback_inodes_sb);
 /**
 * sync_inodes_sb       -       sync sb inode pages
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index 8dcb114758e3..e2cba1f60c21 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -237,13 +237,12 @@ static int fscache_alloc_object(struct fscache_cache *cache,
                                struct fscache_cookie *cookie)
 {
        struct fscache_object *object;
-        struct hlist_node *_n;
        int ret;
        _enter("%p,%p{%s}", cache, cookie, cookie->def->name);
        spin_lock(&cookie->lock);
-        hlist_for_each_entry(object, _n, &cookie->backing_objects,
+        hlist_for_each_entry(object, &cookie->backing_objects,
                             cookie_link) {
                if (object->cache == cache)
                        goto object_already_extant;
@@ -311,7 +310,6 @@ static int fscache_attach_object(struct fscache_cookie *cookie,
 {
        struct fscache_object *p;
        struct fscache_cache *cache = object->cache;
-        struct hlist_node *_n;
        int ret;
        _enter("{%s},{OBJ%x}", cookie->def->name, object->debug_id);
@@ -321,7 +319,7 @@ static int fscache_attach_object(struct fscache_cookie *cookie,
        /* there may be multiple initial creations of this object, but we only
         * want one */
        ret = -EEXIST;
-        hlist_for_each_entry(p, _n, &cookie->backing_objects, cookie_link) {
+        hlist_for_each_entry(p, &cookie->backing_objects, cookie_link) {
                if (p->cache == object->cache) {
                        if (p->state >= FSCACHE_OBJECT_DYING)
                                ret = -ENOBUFS;
@@ -331,7 +329,7 @@ static int fscache_attach_object(struct fscache_cookie *cookie,
        /* pin the parent object */
        spin_lock_nested(&cookie->parent->lock, 1);
-        hlist_for_each_entry(p, _n, &cookie->parent->backing_objects,
+        hlist_for_each_entry(p, &cookie->parent->backing_objects,
                             cookie_link) {
                if (p->cache == object->cache) {
                        if (p->state >= FSCACHE_OBJECT_DYING) {
@@ -435,7 +433,6 @@ EXPORT_SYMBOL(__fscache_wait_on_invalidate);
 void __fscache_update_cookie(struct fscache_cookie *cookie)
 {
        struct fscache_object *object;
-        struct hlist_node *_p;
        fscache_stat(&fscache_n_updates);
@@ -452,7 +449,7 @@ void __fscache_update_cookie(struct fscache_cookie *cookie)
        spin_lock(&cookie->lock);
        /* update the index entry on disk in each cache backing this cookie */
-        hlist_for_each_entry(object, _p,
+        hlist_for_each_entry(object,
                             &cookie->backing_objects, cookie_link) {
                fscache_raise_event(object, FSCACHE_OBJECT_EV_UPDATE);
        }
diff --git a/fs/fuse/Kconfig b/fs/fuse/Kconfig
index 0cf160a94eda..1b2f6c2c3aaf 100644
--- a/fs/fuse/Kconfig
+++ b/fs/fuse/Kconfig
@@ -4,12 +4,24 @@ config FUSE_FS
          With FUSE it is possible to implement a fully functional filesystem
          in a userspace program.
-          There's also companion library: libfuse.  This library along with
+          There's also a companion library: libfuse2.  This library is available
-          utilities is available from the FUSE homepage:
+          from the FUSE homepage:
          <http://fuse.sourceforge.net/>
+          although chances are your distribution already has that library
+          installed if you've installed the "fuse" package itself.
          See <file:Documentation/filesystems/fuse.txt> for more information.
          See <file:Documentation/Changes> for needed library/utility version.
          If you want to develop a userspace FS, or if you want to use
          a filesystem based on FUSE, answer Y or M.
+config CUSE
+        tristate "Character device in Userspace support"
+        depends on FUSE_FS
+        help
+          This FUSE extension allows character devices to be
+          implemented in userspace.
+          If you want to develop or use a userspace character device
+          based on CUSE, answer Y or M.
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 75a20c092dd4..b7978b9f75ef 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -23,7 +23,7 @@ static struct fuse_conn *fuse_ctl_file_conn_get(struct file *file)
 {
        struct fuse_conn *fc;
        mutex_lock(&fuse_mutex);
-        fc = file->f_path.dentry->d_inode->i_private;
+        fc = file_inode(file)->i_private;
        if (fc)
                fc = fuse_conn_get(fc);
        mutex_unlock(&fuse_mutex);
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index ee8d55042298..6f96a8def147 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -45,7 +45,6 @@
 #include <linux/miscdevice.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
-#include <linux/spinlock.h>
 #include <linux/stat.h>
 #include <linux/module.h>
@@ -63,7 +62,7 @@ struct cuse_conn {
        bool                    unrestricted_ioctl;
 };
-static DEFINE_SPINLOCK(cuse_lock);              /* protects cuse_conntbl */
+static DEFINE_MUTEX(cuse_lock);         /* protects registration */
 static struct list_head cuse_conntbl[CUSE_CONNTBL_LEN];
 static struct class *cuse_class;
@@ -92,19 +91,22 @@ static ssize_t cuse_read(struct file *file, char __user *buf, size_t count,
                         loff_t *ppos)
 {
        loff_t pos = 0;
+        struct iovec iov = { .iov_base = buf, .iov_len = count };
-        return fuse_direct_io(file, buf, count, &pos, 0);
+        return fuse_direct_io(file, &iov, 1, count, &pos, 0);
 }
 static ssize_t cuse_write(struct file *file, const char __user *buf,
                          size_t count, loff_t *ppos)
 {
        loff_t pos = 0;
+        struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
        /*
         * No locking or generic_write_checks(), the server is
         * responsible for locking and sanity checks.
         */
-        return fuse_direct_io(file, buf, count, &pos, 1);
+        return fuse_direct_io(file, &iov, 1, count, &pos, 1);
 }
 static int cuse_open(struct inode *inode, struct file *file)
@@ -114,14 +116,14 @@ static int cuse_open(struct inode *inode, struct file *file)
        int rc;
        /* look up and get the connection */
-        spin_lock(&cuse_lock);
+        mutex_lock(&cuse_lock);
        list_for_each_entry(pos, cuse_conntbl_head(devt), list)
                if (pos->dev->devt == devt) {
                        fuse_conn_get(&pos->fc);
                        cc = pos;
                        break;
                }
-        spin_unlock(&cuse_lock);
+        mutex_unlock(&cuse_lock);
        /* dead? */
        if (!cc)
@@ -267,7 +269,7 @@ static int cuse_parse_one(char **pp, char *end, char **keyp, char **valp)
 static int cuse_parse_devinfo(char *p, size_t len, struct cuse_devinfo *devinfo)
 {
        char *end = p + len;
-        char *key, *val;
+        char *uninitialized_var(key), *uninitialized_var(val);
        int rc;
        while (true) {
@@ -305,14 +307,14 @@ static void cuse_gendev_release(struct device *dev)
 */
 static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
 {
-        struct cuse_conn *cc = fc_to_cc(fc);
+        struct cuse_conn *cc = fc_to_cc(fc), *pos;
        struct cuse_init_out *arg = req->out.args[0].value;
        struct page *page = req->pages[0];
        struct cuse_devinfo devinfo = { };
        struct device *dev;
        struct cdev *cdev;
        dev_t devt;
-        int rc;
+        int rc, i;
        if (req->out.h.error ||
            arg->major != FUSE_KERNEL_VERSION || arg->minor < 11) {
@@ -356,15 +358,24 @@ static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
        dev_set_drvdata(dev, cc);
        dev_set_name(dev, "%s", devinfo.name);
+        mutex_lock(&cuse_lock);
+        /* make sure the device-name is unique */
+        for (i = 0; i < CUSE_CONNTBL_LEN; ++i) {
+                list_for_each_entry(pos, &cuse_conntbl[i], list)
+                        if (!strcmp(dev_name(pos->dev), dev_name(dev)))
+                                goto err_unlock;
+        }
        rc = device_add(dev);
        if (rc)
-                goto err_device;
+                goto err_unlock;
        /* register cdev */
        rc = -ENOMEM;
        cdev = cdev_alloc();
        if (!cdev)
-                goto err_device;
+                goto err_unlock;
        cdev->owner = THIS_MODULE;
        cdev->ops = &cuse_frontend_fops;
@@ -377,9 +388,8 @@ static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
        cc->cdev = cdev;
        /* make the device available */
-        spin_lock(&cuse_lock);
        list_add(&cc->list, cuse_conntbl_head(devt));
-        spin_unlock(&cuse_lock);
+        mutex_unlock(&cuse_lock);
        /* announce device availability */
        dev_set_uevent_suppress(dev, 0);
@@ -391,7 +401,8 @@ out:
 err_cdev:
        cdev_del(cdev);
-err_device:
+err_unlock:
+        mutex_unlock(&cuse_lock);
        put_device(dev);
 err_region:
        unregister_chrdev_region(devt, 1);
@@ -411,7 +422,7 @@ static int cuse_send_init(struct cuse_conn *cc)
        BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE);
-        req = fuse_get_req(fc);
+        req = fuse_get_req(fc, 1);
        if (IS_ERR(req)) {
                rc = PTR_ERR(req);
                goto err;
@@ -441,6 +452,7 @@ static int cuse_send_init(struct cuse_conn *cc)
        req->out.argvar = 1;
        req->out.argpages = 1;
        req->pages[0] = page;
+        req->page_descs[0].length = req->out.args[1].size;
        req->num_pages = 1;
        req->end = cuse_process_init_reply;
        fuse_request_send_background(fc, req);
@@ -520,9 +532,9 @@ static int cuse_channel_release(struct inode *inode, struct file *file)
        int rc;
        /* remove from the conntbl, no more access from this point on */
-        spin_lock(&cuse_lock);
+        mutex_lock(&cuse_lock);
        list_del_init(&cc->list);
-        spin_unlock(&cuse_lock);
+        mutex_unlock(&cuse_lock);
        /* remove device */
        if (cc->dev)
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index c16335315e5d..e9bdec0b16d9 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -34,34 +34,67 @@ static struct fuse_conn *fuse_get_conn(struct file *file)
        return file->private_data;
 }
-static void fuse_request_init(struct fuse_req *req)
+static void fuse_request_init(struct fuse_req *req, struct page **pages,
+                              struct fuse_page_desc *page_descs,
+                              unsigned npages)
 {
        memset(req, 0, sizeof(*req));
+        memset(pages, 0, sizeof(*pages) * npages);
+        memset(page_descs, 0, sizeof(*page_descs) * npages);
        INIT_LIST_HEAD(&req->list);
        INIT_LIST_HEAD(&req->intr_entry);
        init_waitqueue_head(&req->waitq);
        atomic_set(&req->count, 1);
+        req->pages = pages;
+        req->page_descs = page_descs;
+        req->max_pages = npages;
 }
-struct fuse_req *fuse_request_alloc(void)
+static struct fuse_req *__fuse_request_alloc(unsigned npages, gfp_t flags)
 {
-        struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_KERNEL);
+        struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, flags);
-        if (req)
+        if (req) {
-                fuse_request_init(req);
+                struct page **pages;
+                struct fuse_page_desc *page_descs;
+                if (npages <= FUSE_REQ_INLINE_PAGES) {
+                        pages = req->inline_pages;
+                        page_descs = req->inline_page_descs;
+                } else {
+                        pages = kmalloc(sizeof(struct page *) * npages, flags);
+                        page_descs = kmalloc(sizeof(struct fuse_page_desc) *
+                                             npages, flags);
+                }
+                if (!pages || !page_descs) {
+                        kfree(pages);
+                        kfree(page_descs);
+                        kmem_cache_free(fuse_req_cachep, req);
+                        return NULL;
+                }
+                fuse_request_init(req, pages, page_descs, npages);
+        }
        return req;
 }
+struct fuse_req *fuse_request_alloc(unsigned npages)
+{
+        return __fuse_request_alloc(npages, GFP_KERNEL);
+}
 EXPORT_SYMBOL_GPL(fuse_request_alloc);
-struct fuse_req *fuse_request_alloc_nofs(void)
+struct fuse_req *fuse_request_alloc_nofs(unsigned npages)
 {
-        struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_NOFS);
+        return __fuse_request_alloc(npages, GFP_NOFS);
-        if (req)
-                fuse_request_init(req);
-        return req;
 }
 void fuse_request_free(struct fuse_req *req)
 {
+        if (req->pages != req->inline_pages) {
+                kfree(req->pages);
+                kfree(req->page_descs);
+        }
        kmem_cache_free(fuse_req_cachep, req);
 }
@@ -97,7 +130,7 @@ static void fuse_req_init_context(struct fuse_req *req)
        req->in.h.pid = current->pid;
 }
-struct fuse_req *fuse_get_req(struct fuse_conn *fc)
+struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages)
 {
        struct fuse_req *req;
        sigset_t oldset;
@@ -116,7 +149,7 @@ struct fuse_req *fuse_get_req(struct fuse_conn *fc)
        if (!fc->connected)
                goto out;
-        req = fuse_request_alloc();
+        req = fuse_request_alloc(npages);
        err = -ENOMEM;
        if (!req)
                goto out;
@@ -165,7 +198,7 @@ static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
        struct fuse_file *ff = file->private_data;
        spin_lock(&fc->lock);
-        fuse_request_init(req);
+        fuse_request_init(req, req->pages, req->page_descs, req->max_pages);
        BUG_ON(ff->reserved_req);
        ff->reserved_req = req;
        wake_up_all(&fc->reserved_req_waitq);
@@ -186,13 +219,14 @@ static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
 * filesystem should not have it's own file open.  If deadlock is
 * intentional, it can still be broken by "aborting" the filesystem.
 */
-struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file)
+struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
+                                             struct file *file)
 {
        struct fuse_req *req;
        atomic_inc(&fc->num_waiting);
        wait_event(fc->blocked_waitq, !fc->blocked);
-        req = fuse_request_alloc();
+        req = fuse_request_alloc(0);
        if (!req)
                req = get_reserved_req(fc, file);
@@ -406,9 +440,8 @@ __acquires(fc->lock)
        }
 }
-void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
+static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
 {
-        req->isreply = 1;
        spin_lock(&fc->lock);
        if (!fc->connected)
                req->out.h.error = -ENOTCONN;
@@ -425,6 +458,12 @@ void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
        }
        spin_unlock(&fc->lock);
 }
+void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
+{
+        req->isreply = 1;
+        __fuse_request_send(fc, req);
+}
 EXPORT_SYMBOL_GPL(fuse_request_send);
 static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
@@ -491,6 +530,27 @@ void fuse_request_send_background_locked(struct fuse_conn *fc,
        fuse_request_send_nowait_locked(fc, req);
 }
+void fuse_force_forget(struct file *file, u64 nodeid)
+{
+        struct inode *inode = file->f_path.dentry->d_inode;
+        struct fuse_conn *fc = get_fuse_conn(inode);
+        struct fuse_req *req;
+        struct fuse_forget_in inarg;
+        memset(&inarg, 0, sizeof(inarg));
+        inarg.nlookup = 1;
+        req = fuse_get_req_nofail_nopages(fc, file);
+        req->in.h.opcode = FUSE_FORGET;
+        req->in.h.nodeid = nodeid;
+        req->in.numargs = 1;
+        req->in.args[0].size = sizeof(inarg);
+        req->in.args[0].value = &inarg;
+        req->isreply = 0;
+        __fuse_request_send(fc, req);
+        /* ignore errors */
+        fuse_put_request(fc, req);
+}
 /*
 * Lock the request.  Up to the next unlock_request() there mustn't be
 * anything that could cause a page-fault.  If the request was already
@@ -692,8 +752,6 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
        struct page *oldpage = *pagep;
        struct page *newpage;
        struct pipe_buffer *buf = cs->pipebufs;
-        struct address_space *mapping;
-        pgoff_t index;
        unlock_request(cs->fc, cs->req);
        fuse_copy_finish(cs);
@@ -724,9 +782,6 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
        if (fuse_check_page(newpage) != 0)
                goto out_fallback_unlock;
-        mapping = oldpage->mapping;
-        index = oldpage->index;
        /*
         * This is a new and locked page, it shouldn't be mapped or
         * have any special flags on it
@@ -855,11 +910,11 @@ static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
 {
        unsigned i;
        struct fuse_req *req = cs->req;
-        unsigned offset = req->page_offset;
-        unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset);
        for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
                int err;
+                unsigned offset = req->page_descs[i].offset;
+                unsigned count = min(nbytes, req->page_descs[i].length);
                err = fuse_copy_page(cs, &req->pages[i], offset, count,
                                     zeroing);
@@ -867,8 +922,6 @@ static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
                        return err;
                nbytes -= count;
-                count = min(nbytes, (unsigned) PAGE_SIZE);
-                offset = 0;
        }
        return 0;
 }
@@ -1541,29 +1594,34 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
        unsigned int num;
        unsigned int offset;
        size_t total_len = 0;
+        int num_pages;
+        offset = outarg->offset & ~PAGE_CACHE_MASK;
+        file_size = i_size_read(inode);
+        num = outarg->size;
+        if (outarg->offset > file_size)
+                num = 0;
+        else if (outarg->offset + num > file_size)
+                num = file_size - outarg->offset;
-        req = fuse_get_req(fc);
+        num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
+        num_pages = min(num_pages, FUSE_MAX_PAGES_PER_REQ);
+        req = fuse_get_req(fc, num_pages);
        if (IS_ERR(req))
                return PTR_ERR(req);
-        offset = outarg->offset & ~PAGE_CACHE_MASK;
        req->in.h.opcode = FUSE_NOTIFY_REPLY;
        req->in.h.nodeid = outarg->nodeid;
        req->in.numargs = 2;
        req->in.argpages = 1;
-        req->page_offset = offset;
+        req->page_descs[0].offset = offset;
        req->end = fuse_retrieve_end;
        index = outarg->offset >> PAGE_CACHE_SHIFT;
-        file_size = i_size_read(inode);
-        num = outarg->size;
-        if (outarg->offset > file_size)
-                num = 0;
-        else if (outarg->offset + num > file_size)
-                num = file_size - outarg->offset;
-        while (num && req->num_pages < FUSE_MAX_PAGES_PER_REQ) {
+        while (num && req->num_pages < num_pages) {
                struct page *page;
                unsigned int this_num;
@@ -1573,6 +1631,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
                this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
                req->pages[req->num_pages] = page;
+                req->page_descs[req->num_pages].length = this_num;
                req->num_pages++;
                offset = 0;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index b7c09f9eb40c..ff15522481d4 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -14,6 +14,29 @@
 #include <linux/namei.h>
 #include <linux/slab.h>
+static bool fuse_use_readdirplus(struct inode *dir, struct file *filp)
+{
+        struct fuse_conn *fc = get_fuse_conn(dir);
+        struct fuse_inode *fi = get_fuse_inode(dir);
+        if (!fc->do_readdirplus)
+                return false;
+        if (!fc->readdirplus_auto)
+                return true;
+        if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
+                return true;
+        if (filp->f_pos == 0)
+                return true;
+        return false;
+}
+static void fuse_advise_use_readdirplus(struct inode *dir)
+{
+        struct fuse_inode *fi = get_fuse_inode(dir);
+        set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
+}
 #if BITS_PER_LONG >= 64
 static inline void fuse_dentry_settime(struct dentry *entry, u64 time)
 {
@@ -178,7 +201,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
                        return -ECHILD;
                fc = get_fuse_conn(inode);
-                req = fuse_get_req(fc);
+                req = fuse_get_req_nopages(fc);
                if (IS_ERR(req))
                        return 0;
@@ -219,6 +242,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
                                       attr_version);
                fuse_change_entry_timeout(entry, &outarg);
        }
+        fuse_advise_use_readdirplus(inode);
        return 1;
 }
@@ -271,7 +295,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
        if (name->len > FUSE_NAME_MAX)
                goto out;
-        req = fuse_get_req(fc);
+        req = fuse_get_req_nopages(fc);
        err = PTR_ERR(req);
        if (IS_ERR(req))
                goto out;
@@ -355,6 +379,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
        else
                fuse_invalidate_entry_cache(entry);
+        fuse_advise_use_readdirplus(dir);
        return newent;
 out_iput:
@@ -391,7 +416,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
        if (!forget)
                goto out_err;
-        req = fuse_get_req(fc);
+        req = fuse_get_req_nopages(fc);
        err = PTR_ERR(req);
        if (IS_ERR(req))
                goto out_put_forget_req;
@@ -592,7 +617,7 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
 {
        struct fuse_mknod_in inarg;
        struct fuse_conn *fc = get_fuse_conn(dir);
-        struct fuse_req *req = fuse_get_req(fc);
+        struct fuse_req *req = fuse_get_req_nopages(fc);
        if (IS_ERR(req))
                return PTR_ERR(req);
@@ -623,7 +648,7 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
 {
        struct fuse_mkdir_in inarg;
        struct fuse_conn *fc = get_fuse_conn(dir);
-        struct fuse_req *req = fuse_get_req(fc);
+        struct fuse_req *req = fuse_get_req_nopages(fc);
        if (IS_ERR(req))
                return PTR_ERR(req);
@@ -647,7 +672,7 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry,
 {
        struct fuse_conn *fc = get_fuse_conn(dir);
        unsigned len = strlen(link) + 1;
-        struct fuse_req *req = fuse_get_req(fc);
+        struct fuse_req *req = fuse_get_req_nopages(fc);
        if (IS_ERR(req))
                return PTR_ERR(req);
@@ -664,7 +689,7 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
 {
        int err;
        struct fuse_conn *fc = get_fuse_conn(dir);
-        struct fuse_req *req = fuse_get_req(fc);
+        struct fuse_req *req = fuse_get_req_nopages(fc);
        if (IS_ERR(req))
                return PTR_ERR(req);
@@ -682,7 +707,14 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
                spin_lock(&fc->lock);
                fi->attr_version = ++fc->attr_version;
-                drop_nlink(inode);
+                /*
+                 * If i_nlink == 0 then unlink doesn't make sense, yet this can
+                 * happen if userspace filesystem is careless.  It would be
+                 * difficult to enforce correct nlink usage so just ignore this
+                 * condition here
+                 */
+                if (inode->i_nlink > 0)
+                        drop_nlink(inode);
                spin_unlock(&fc->lock);
                fuse_invalidate_attr(inode);
                fuse_invalidate_attr(dir);
@@ -696,7 +728,7 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
 {
        int err;
        struct fuse_conn *fc = get_fuse_conn(dir);
-        struct fuse_req *req = fuse_get_req(fc);
+        struct fuse_req *req = fuse_get_req_nopages(fc);
        if (IS_ERR(req))
                return PTR_ERR(req);
@@ -723,7 +755,7 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent,
        int err;
        struct fuse_rename_in inarg;
        struct fuse_conn *fc = get_fuse_conn(olddir);
-        struct fuse_req *req = fuse_get_req(fc);
+        struct fuse_req *req = fuse_get_req_nopages(fc);
        if (IS_ERR(req))
                return PTR_ERR(req);
@@ -776,7 +808,7 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
        struct fuse_link_in inarg;
        struct inode *inode = entry->d_inode;
        struct fuse_conn *fc = get_fuse_conn(inode);
-        struct fuse_req *req = fuse_get_req(fc);
+        struct fuse_req *req = fuse_get_req_nopages(fc);
        if (IS_ERR(req))
                return PTR_ERR(req);
@@ -848,7 +880,7 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
        struct fuse_req *req;
        u64 attr_version;
-        req = fuse_get_req(fc);
+        req = fuse_get_req_nopages(fc);
        if (IS_ERR(req))
                return PTR_ERR(req);
@@ -985,7 +1017,7 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
 /*
 * Calling into a user-controlled filesystem gives the filesystem
- * daemon ptrace-like capabilities over the requester process.  This
+ * daemon ptrace-like capabilities over the current process.  This
 * means, that the filesystem daemon is able to record the exact
 * filesystem operations performed, and can also control the behavior
 * of the requester process in otherwise impossible ways.  For example
@@ -996,27 +1028,23 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
 * for which the owner of the mount has ptrace privilege.  This
 * excludes processes started by other users, suid or sgid processes.
 */
-int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task)
+int fuse_allow_current_process(struct fuse_conn *fc)
 {
        const struct cred *cred;
-        int ret;
        if (fc->flags & FUSE_ALLOW_OTHER)
                return 1;
-        rcu_read_lock();
+        cred = current_cred();
-        ret = 0;
-        cred = __task_cred(task);
        if (uid_eq(cred->euid, fc->user_id) &&
            uid_eq(cred->suid, fc->user_id) &&
            uid_eq(cred->uid,  fc->user_id) &&
            gid_eq(cred->egid, fc->group_id) &&
            gid_eq(cred->sgid, fc->group_id) &&
            gid_eq(cred->gid,  fc->group_id))
-                ret = 1;
+                return 1;
-        rcu_read_unlock();
-        return ret;
+        return 0;
 }
 static int fuse_access(struct inode *inode, int mask)
@@ -1029,7 +1057,7 @@ static int fuse_access(struct inode *inode, int mask)
        if (fc->no_access)
                return 0;
-        req = fuse_get_req(fc);
+        req = fuse_get_req_nopages(fc);
        if (IS_ERR(req))
                return PTR_ERR(req);
@@ -1077,7 +1105,7 @@ static int fuse_permission(struct inode *inode, int mask)
        bool refreshed = false;
        int err = 0;
-        if (!fuse_allow_task(fc, current))
+        if (!fuse_allow_current_process(fc))
                return -EACCES;
        /*
@@ -1155,19 +1183,157 @@ static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
        return 0;
 }
-static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
+static int fuse_direntplus_link(struct file *file,
+                                struct fuse_direntplus *direntplus,
+                                u64 attr_version)
 {
        int err;
+        struct fuse_entry_out *o = &direntplus->entry_out;
+        struct fuse_dirent *dirent = &direntplus->dirent;
+        struct dentry *parent = file->f_path.dentry;
+        struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
+        struct dentry *dentry;
+        struct dentry *alias;
+        struct inode *dir = parent->d_inode;
+        struct fuse_conn *fc;
+        struct inode *inode;
+        if (!o->nodeid) {
+                /*
+                 * Unlike in the case of fuse_lookup, zero nodeid does not mean
+                 * ENOENT. Instead, it only means the userspace filesystem did
+                 * not want to return attributes/handle for this entry.
+                 *
+                 * So do nothing.
+                 */
+                return 0;
+        }
+        if (name.name[0] == '.') {
+                /*
+                 * We could potentially refresh the attributes of the directory
+                 * and its parent?
+                 */
+                if (name.len == 1)
+                        return 0;
+                if (name.name[1] == '.' && name.len == 2)
+                        return 0;
+        }
+        fc = get_fuse_conn(dir);
+        name.hash = full_name_hash(name.name, name.len);
+        dentry = d_lookup(parent, &name);
+        if (dentry && dentry->d_inode) {
+                inode = dentry->d_inode;
+                if (get_node_id(inode) == o->nodeid) {
+                        struct fuse_inode *fi;
+                        fi = get_fuse_inode(inode);
+                        spin_lock(&fc->lock);
+                        fi->nlookup++;
+                        spin_unlock(&fc->lock);
+                        /*
+                         * The other branch to 'found' comes via fuse_iget()
+                         * which bumps nlookup inside
+                         */
+                        goto found;
+                }
+                err = d_invalidate(dentry);
+                if (err)
+                        goto out;
+                dput(dentry);
+                dentry = NULL;
+        }
+        dentry = d_alloc(parent, &name);
+        err = -ENOMEM;
+        if (!dentry)
+                goto out;
+        inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
+                          &o->attr, entry_attr_timeout(o), attr_version);
+        if (!inode)
+                goto out;
+        alias = d_materialise_unique(dentry, inode);
+        err = PTR_ERR(alias);
+        if (IS_ERR(alias))
+                goto out;
+        if (alias) {
+                dput(dentry);
+                dentry = alias;
+        }
+found:
+        fuse_change_attributes(inode, &o->attr, entry_attr_timeout(o),
+                               attr_version);
+        fuse_change_entry_timeout(dentry, o);
+        err = 0;
+out:
+        if (dentry)
+                dput(dentry);
+        return err;
+}
+static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
+                             void *dstbuf, filldir_t filldir, u64 attr_version)
+{
+        struct fuse_direntplus *direntplus;
+        struct fuse_dirent *dirent;
+        size_t reclen;
+        int over = 0;
+        int ret;
+        while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
+                direntplus = (struct fuse_direntplus *) buf;
+                dirent = &direntplus->dirent;
+                reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
+                if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
+                        return -EIO;
+                if (reclen > nbytes)
+                        break;
+                if (!over) {
+                        /* We fill entries into dstbuf only as much as
+                           it can hold. But we still continue iterating
+                           over remaining entries to link them. If not,
+                           we need to send a FORGET for each of those
+                           which we did not link.
+                        */
+                        over = filldir(dstbuf, dirent->name, dirent->namelen,
+                                       file->f_pos, dirent->ino,
+                                       dirent->type);
+                        file->f_pos = dirent->off;
+                }
+                buf += reclen;
+                nbytes -= reclen;
+                ret = fuse_direntplus_link(file, direntplus, attr_version);
+                if (ret)
+                        fuse_force_forget(file, direntplus->entry_out.nodeid);
+        }
+        return 0;
+}
+static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
+{
+        int plus, err;
        size_t nbytes;
        struct page *page;
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(file);
        struct fuse_conn *fc = get_fuse_conn(inode);
        struct fuse_req *req;
+        u64 attr_version = 0;
        if (is_bad_inode(inode))
                return -EIO;
-        req = fuse_get_req(fc);
+        req = fuse_get_req(fc, 1);
        if (IS_ERR(req))
                return PTR_ERR(req);
@@ -1176,17 +1342,34 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
                fuse_put_request(fc, req);
                return -ENOMEM;
        }
+        plus = fuse_use_readdirplus(inode, file);
        req->out.argpages = 1;
        req->num_pages = 1;
        req->pages[0] = page;
-        fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, FUSE_READDIR);
+        req->page_descs[0].length = PAGE_SIZE;
+        if (plus) {
+                attr_version = fuse_get_attr_version(fc);
+                fuse_read_fill(req, file, file->f_pos, PAGE_SIZE,
+                               FUSE_READDIRPLUS);
+        } else {
+                fuse_read_fill(req, file, file->f_pos, PAGE_SIZE,
+                               FUSE_READDIR);
+        }
        fuse_request_send(fc, req);
        nbytes = req->out.args[0].size;
        err = req->out.h.error;
        fuse_put_request(fc, req);
-        if (!err)
+        if (!err) {
-                err = parse_dirfile(page_address(page), nbytes, file, dstbuf,
+                if (plus) {
-                                    filldir);
+                        err = parse_dirplusfile(page_address(page), nbytes,
+                                                file, dstbuf, filldir,
+                                                attr_version);
+                } else {
+                        err = parse_dirfile(page_address(page), nbytes, file,
+                                            dstbuf, filldir);
+                }
+        }
        __free_page(page);
        fuse_invalidate_attr(inode); /* atime changed */
@@ -1197,7 +1380,7 @@ static char *read_link(struct dentry *dentry)
 {
        struct inode *inode = dentry->d_inode;
        struct fuse_conn *fc = get_fuse_conn(inode);
-        struct fuse_req *req = fuse_get_req(fc);
+        struct fuse_req *req = fuse_get_req_nopages(fc);
        char *link;
        if (IS_ERR(req))
@@ -1391,7 +1574,7 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
        loff_t oldsize;
        int err;
-        if (!fuse_allow_task(fc, current))
+        if (!fuse_allow_current_process(fc))
                return -EACCES;
        if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
@@ -1410,7 +1593,7 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
        if (attr->ia_valid & ATTR_SIZE)
                is_truncate = true;
-        req = fuse_get_req(fc);
+        req = fuse_get_req_nopages(fc);
        if (IS_ERR(req))
                return PTR_ERR(req);
@@ -1500,7 +1683,7 @@ static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
        struct inode *inode = entry->d_inode;
        struct fuse_conn *fc = get_fuse_conn(inode);
-        if (!fuse_allow_task(fc, current))
+        if (!fuse_allow_current_process(fc))
                return -EACCES;
        return fuse_update_attributes(inode, stat, NULL, NULL);
@@ -1518,7 +1701,7 @@ static int fuse_setxattr(struct dentry *entry, const char *name,
        if (fc->no_setxattr)
                return -EOPNOTSUPP;
-        req = fuse_get_req(fc);
+        req = fuse_get_req_nopages(fc);
        if (IS_ERR(req))
                return PTR_ERR(req);
@@ -1557,7 +1740,7 @@ static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
        if (fc->no_getxattr)
                return -EOPNOTSUPP;
-        req = fuse_get_req(fc);
+        req = fuse_get_req_nopages(fc);
        if (IS_ERR(req))
                return PTR_ERR(req);
@@ -1603,13 +1786,13 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
        struct fuse_getxattr_out outarg;
        ssize_t ret;
-        if (!fuse_allow_task(fc, current))
+        if (!fuse_allow_current_process(fc))
                return -EACCES;
        if (fc->no_listxattr)
                return -EOPNOTSUPP;
-        req = fuse_get_req(fc);
+        req = fuse_get_req_nopages(fc);
        if (IS_ERR(req))
                return PTR_ERR(req);
@@ -1654,7 +1837,7 @@ static int fuse_removexattr(struct dentry *entry, const char *name)
        if (fc->no_removexattr)
                return -EOPNOTSUPP;
-        req = fuse_get_req(fc);
+        req = fuse_get_req_nopages(fc);
        if (IS_ERR(req))
                return PTR_ERR(req);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index e21d4d8f87e3..c8071768b950 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -25,7 +25,7 @@ static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
        struct fuse_req *req;
        int err;
-        req = fuse_get_req(fc);
+        req = fuse_get_req_nopages(fc);
        if (IS_ERR(req))
                return PTR_ERR(req);
@@ -57,7 +57,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
                return NULL;
        ff->fc = fc;
-        ff->reserved_req = fuse_request_alloc();
+        ff->reserved_req = fuse_request_alloc(0);
        if (unlikely(!ff->reserved_req)) {
                kfree(ff);
                return NULL;
@@ -368,7 +368,7 @@ static int fuse_flush(struct file *file, fl_owner_t id)
        if (fc->no_flush)
                return 0;
-        req = fuse_get_req_nofail(fc, file);
+        req = fuse_get_req_nofail_nopages(fc, file);
        memset(&inarg, 0, sizeof(inarg));
        inarg.fh = ff->fh;
        inarg.lock_owner = fuse_lock_owner_id(fc, id);
@@ -436,7 +436,7 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
        fuse_sync_writes(inode);
-        req = fuse_get_req(fc);
+        req = fuse_get_req_nopages(fc);
        if (IS_ERR(req)) {
                err = PTR_ERR(req);
                goto out;
@@ -544,7 +544,7 @@ static int fuse_readpage(struct file *file, struct page *page)
         */
        fuse_wait_on_page_writeback(inode, page->index);
-        req = fuse_get_req(fc);
+        req = fuse_get_req(fc, 1);
        err = PTR_ERR(req);
        if (IS_ERR(req))
                goto out;
@@ -555,6 +555,7 @@ static int fuse_readpage(struct file *file, struct page *page)
        req->out.argpages = 1;
        req->num_pages = 1;
        req->pages[0] = page;
+        req->page_descs[0].length = count;
        num_read = fuse_send_read(req, file, pos, count, NULL);
        err = req->out.h.error;
        fuse_put_request(fc, req);
@@ -641,6 +642,7 @@ struct fuse_fill_data {
        struct fuse_req *req;
        struct file *file;
        struct inode *inode;
+        unsigned nr_pages;
 };
 static int fuse_readpages_fill(void *_data, struct page *page)
@@ -656,16 +658,26 @@ static int fuse_readpages_fill(void *_data, struct page *page)
            (req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
             (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read ||
             req->pages[req->num_pages - 1]->index + 1 != page->index)) {
+                int nr_alloc = min_t(unsigned, data->nr_pages,
+                                     FUSE_MAX_PAGES_PER_REQ);
                fuse_send_readpages(req, data->file);
-                data->req = req = fuse_get_req(fc);
+                data->req = req = fuse_get_req(fc, nr_alloc);
                if (IS_ERR(req)) {
                        unlock_page(page);
                        return PTR_ERR(req);
                }
        }
+        if (WARN_ON(req->num_pages >= req->max_pages)) {
+                fuse_put_request(fc, req);
+                return -EIO;
+        }
        page_cache_get(page);
        req->pages[req->num_pages] = page;
+        req->page_descs[req->num_pages].length = PAGE_SIZE;
        req->num_pages++;
+        data->nr_pages--;
        return 0;
 }
@@ -676,6 +688,7 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
        struct fuse_conn *fc = get_fuse_conn(inode);
        struct fuse_fill_data data;
        int err;
+        int nr_alloc = min_t(unsigned, nr_pages, FUSE_MAX_PAGES_PER_REQ);
        err = -EIO;
        if (is_bad_inode(inode))
@@ -683,7 +696,8 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
        data.file = file;
        data.inode = inode;
-        data.req = fuse_get_req(fc);
+        data.req = fuse_get_req(fc, nr_alloc);
+        data.nr_pages = nr_pages;
        err = PTR_ERR(data.req);
        if (IS_ERR(data.req))
                goto out;
@@ -786,7 +800,7 @@ static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file,
        res = fuse_send_write(req, file, pos, count, NULL);
-        offset = req->page_offset;
+        offset = req->page_descs[0].offset;
        count = res;
        for (i = 0; i < req->num_pages; i++) {
                struct page *page = req->pages[i];
@@ -817,7 +831,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
        int err;
        req->in.argpages = 1;
-        req->page_offset = offset;
+        req->page_descs[0].offset = offset;
        do {
                size_t tmp;
@@ -857,6 +871,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
                err = 0;
                req->pages[req->num_pages] = page;
+                req->page_descs[req->num_pages].length = tmp;
                req->num_pages++;
                iov_iter_advance(ii, tmp);
@@ -869,11 +884,19 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
                if (!fc->big_writes)
                        break;
        } while (iov_iter_count(ii) && count < fc->max_write &&
-                 req->num_pages < FUSE_MAX_PAGES_PER_REQ && offset == 0);
+                 req->num_pages < req->max_pages && offset == 0);
        return count > 0 ? count : err;
 }
+static inline unsigned fuse_wr_pages(loff_t pos, size_t len)
+{
+        return min_t(unsigned,
+                     ((pos + len - 1) >> PAGE_CACHE_SHIFT) -
+                     (pos >> PAGE_CACHE_SHIFT) + 1,
+                     FUSE_MAX_PAGES_PER_REQ);
+}
 static ssize_t fuse_perform_write(struct file *file,
                                  struct address_space *mapping,
                                  struct iov_iter *ii, loff_t pos)
@@ -889,8 +912,9 @@ static ssize_t fuse_perform_write(struct file *file,
        do {
                struct fuse_req *req;
                ssize_t count;
+                unsigned nr_pages = fuse_wr_pages(pos, iov_iter_count(ii));
-                req = fuse_get_req(fc);
+                req = fuse_get_req(fc, nr_pages);
                if (IS_ERR(req)) {
                        err = PTR_ERR(req);
                        break;
@@ -1023,47 +1047,110 @@ static void fuse_release_user_pages(struct fuse_req *req, int write)
        }
 }
-static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf,
+static inline void fuse_page_descs_length_init(struct fuse_req *req,
+                unsigned index, unsigned nr_pages)
+{
+        int i;
+        for (i = index; i < index + nr_pages; i++)
+                req->page_descs[i].length = PAGE_SIZE -
+                        req->page_descs[i].offset;
+}
+static inline unsigned long fuse_get_user_addr(const struct iov_iter *ii)
+{
+        return (unsigned long)ii->iov->iov_base + ii->iov_offset;
+}
+static inline size_t fuse_get_frag_size(const struct iov_iter *ii,
+                                        size_t max_size)
+{
+        return min(iov_iter_single_seg_count(ii), max_size);
+}
+static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
                               size_t *nbytesp, int write)
 {
-        size_t nbytes = *nbytesp;
+        size_t nbytes = 0;  /* # bytes already packed in req */
-        unsigned long user_addr = (unsigned long) buf;
-        unsigned offset = user_addr & ~PAGE_MASK;
-        int npages;
        /* Special case for kernel I/O: can copy directly into the buffer */
        if (segment_eq(get_fs(), KERNEL_DS)) {
+                unsigned long user_addr = fuse_get_user_addr(ii);
+                size_t frag_size = fuse_get_frag_size(ii, *nbytesp);
                if (write)
                        req->in.args[1].value = (void *) user_addr;
                else
                        req->out.args[0].value = (void *) user_addr;
+                iov_iter_advance(ii, frag_size);
+                *nbytesp = frag_size;
                return 0;
        }
-        nbytes = min_t(size_t, nbytes, FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT);
+        while (nbytes < *nbytesp && req->num_pages < req->max_pages) {
-        npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
+                unsigned npages;
-        npages = clamp(npages, 1, FUSE_MAX_PAGES_PER_REQ);
+                unsigned long user_addr = fuse_get_user_addr(ii);
-        npages = get_user_pages_fast(user_addr, npages, !write, req->pages);
+                unsigned offset = user_addr & ~PAGE_MASK;
-        if (npages < 0)
+                size_t frag_size = fuse_get_frag_size(ii, *nbytesp - nbytes);
-                return npages;
+                int ret;
+                unsigned n = req->max_pages - req->num_pages;
+                frag_size = min_t(size_t, frag_size, n << PAGE_SHIFT);
+                npages = (frag_size + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
+                npages = clamp(npages, 1U, n);
+                ret = get_user_pages_fast(user_addr, npages, !write,
+                                          &req->pages[req->num_pages]);
+                if (ret < 0)
+                        return ret;
-        req->num_pages = npages;
+                npages = ret;
-        req->page_offset = offset;
+                frag_size = min_t(size_t, frag_size,
+                                  (npages << PAGE_SHIFT) - offset);
+                iov_iter_advance(ii, frag_size);
+                req->page_descs[req->num_pages].offset = offset;
+                fuse_page_descs_length_init(req, req->num_pages, npages);
+                req->num_pages += npages;
+                req->page_descs[req->num_pages - 1].length -=
+                        (npages << PAGE_SHIFT) - offset - frag_size;
+                nbytes += frag_size;
+        }
        if (write)
                req->in.argpages = 1;
        else
                req->out.argpages = 1;
-        nbytes = (req->num_pages << PAGE_SHIFT) - req->page_offset;
+        *nbytesp = nbytes;
-        *nbytesp = min(*nbytesp, nbytes);
        return 0;
 }
-ssize_t fuse_direct_io(struct file *file, const char __user *buf,
+static inline int fuse_iter_npages(const struct iov_iter *ii_p)
-                       size_t count, loff_t *ppos, int write)
+{
+        struct iov_iter ii = *ii_p;
+        int npages = 0;
+        while (iov_iter_count(&ii) && npages < FUSE_MAX_PAGES_PER_REQ) {
+                unsigned long user_addr = fuse_get_user_addr(&ii);
+                unsigned offset = user_addr & ~PAGE_MASK;
+                size_t frag_size = iov_iter_single_seg_count(&ii);
+                npages += (frag_size + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
+                iov_iter_advance(&ii, frag_size);
+        }
+        return min(npages, FUSE_MAX_PAGES_PER_REQ);
+}
+ssize_t fuse_direct_io(struct file *file, const struct iovec *iov,
+                       unsigned long nr_segs, size_t count, loff_t *ppos,
+                       int write)
 {
        struct fuse_file *ff = file->private_data;
        struct fuse_conn *fc = ff->fc;
@@ -1071,8 +1158,11 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf,
        loff_t pos = *ppos;
        ssize_t res = 0;
        struct fuse_req *req;
+        struct iov_iter ii;
+        iov_iter_init(&ii, iov, nr_segs, count, 0);
-        req = fuse_get_req(fc);
+        req = fuse_get_req(fc, fuse_iter_npages(&ii));
        if (IS_ERR(req))
                return PTR_ERR(req);
@@ -1080,7 +1170,7 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf,
                size_t nres;
                fl_owner_t owner = current->files;
                size_t nbytes = min(count, nmax);
-                int err = fuse_get_user_pages(req, buf, &nbytes, write);
+                int err = fuse_get_user_pages(req, &ii, &nbytes, write);
                if (err) {
                        res = err;
                        break;
@@ -1103,12 +1193,11 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf,
                count -= nres;
                res += nres;
                pos += nres;
-                buf += nres;
                if (nres != nbytes)
                        break;
                if (count) {
                        fuse_put_request(fc, req);
-                        req = fuse_get_req(fc);
+                        req = fuse_get_req(fc, fuse_iter_npages(&ii));
                        if (IS_ERR(req))
                                break;
                }
@@ -1122,8 +1211,8 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf,
 }
 EXPORT_SYMBOL_GPL(fuse_direct_io);
-static ssize_t fuse_direct_read(struct file *file, char __user *buf,
+static ssize_t __fuse_direct_read(struct file *file, const struct iovec *iov,
-                                     size_t count, loff_t *ppos)
+                                  unsigned long nr_segs, loff_t *ppos)
 {
        ssize_t res;
        struct inode *inode = file->f_path.dentry->d_inode;
@@ -1131,22 +1220,31 @@ static ssize_t fuse_direct_read(struct file *file, char __user *buf,
        if (is_bad_inode(inode))
                return -EIO;
-        res = fuse_direct_io(file, buf, count, ppos, 0);
+        res = fuse_direct_io(file, iov, nr_segs, iov_length(iov, nr_segs),
+                             ppos, 0);
        fuse_invalidate_attr(inode);
        return res;
 }
-static ssize_t __fuse_direct_write(struct file *file, const char __user *buf,
+static ssize_t fuse_direct_read(struct file *file, char __user *buf,
-                                   size_t count, loff_t *ppos)
+                                     size_t count, loff_t *ppos)
+{
+        struct iovec iov = { .iov_base = buf, .iov_len = count };
+        return __fuse_direct_read(file, &iov, 1, ppos);
+}
+static ssize_t __fuse_direct_write(struct file *file, const struct iovec *iov,
+                                   unsigned long nr_segs, loff_t *ppos)
 {
        struct inode *inode = file->f_path.dentry->d_inode;
+        size_t count = iov_length(iov, nr_segs);
        ssize_t res;
        res = generic_write_checks(file, ppos, &count, 0);
        if (!res) {
-                res = fuse_direct_io(file, buf, count, ppos, 1);
+                res = fuse_direct_io(file, iov, nr_segs, count, ppos, 1);
                if (res > 0)
                        fuse_write_update_size(inode, *ppos);
        }
@@ -1159,6 +1257,7 @@ static ssize_t __fuse_direct_write(struct file *file, const char __user *buf,
 static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
                                 size_t count, loff_t *ppos)
 {
+        struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
        struct inode *inode = file->f_path.dentry->d_inode;
        ssize_t res;
@@ -1167,7 +1266,7 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
        /* Don't allow parallel writes to the same file */
        mutex_lock(&inode->i_mutex);
-        res = __fuse_direct_write(file, buf, count, ppos);
+        res = __fuse_direct_write(file, &iov, 1, ppos);
        mutex_unlock(&inode->i_mutex);
        return res;
@@ -1272,7 +1371,7 @@ static int fuse_writepage_locked(struct page *page)
        set_page_writeback(page);
-        req = fuse_request_alloc_nofs();
+        req = fuse_request_alloc_nofs(1);
        if (!req)
                goto err;
@@ -1293,7 +1392,8 @@ static int fuse_writepage_locked(struct page *page)
        req->in.argpages = 1;
        req->num_pages = 1;
        req->pages[0] = tmp_page;
-        req->page_offset = 0;
+        req->page_descs[0].offset = 0;
+        req->page_descs[0].length = PAGE_SIZE;
        req->end = fuse_writepage_end;
        req->inode = inode;
@@ -1471,7 +1571,7 @@ static int fuse_getlk(struct file *file, struct file_lock *fl)
        struct fuse_lk_out outarg;
        int err;
-        req = fuse_get_req(fc);
+        req = fuse_get_req_nopages(fc);
        if (IS_ERR(req))
                return PTR_ERR(req);
@@ -1506,7 +1606,7 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
        if (fl->fl_flags & FL_CLOSE)
                return 0;
-        req = fuse_get_req(fc);
+        req = fuse_get_req_nopages(fc);
        if (IS_ERR(req))
                return PTR_ERR(req);
@@ -1575,7 +1675,7 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
        if (!inode->i_sb->s_bdev || fc->no_bmap)
                return 0;
-        req = fuse_get_req(fc);
+        req = fuse_get_req_nopages(fc);
        if (IS_ERR(req))
                return 0;
@@ -1873,7 +1973,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
                num_pages++;
        }
-        req = fuse_get_req(fc);
+        req = fuse_get_req(fc, num_pages);
        if (IS_ERR(req)) {
                err = PTR_ERR(req);
                req = NULL;
@@ -1881,6 +1981,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
        }
        memcpy(req->pages, pages, sizeof(req->pages[0]) * num_pages);
        req->num_pages = num_pages;
+        fuse_page_descs_length_init(req, 0, req->num_pages);
        /* okay, let's send it to the client */
        req->in.h.opcode = FUSE_IOCTL;
@@ -1981,7 +2082,7 @@ long fuse_ioctl_common(struct file *file, unsigned int cmd,
        struct inode *inode = file->f_dentry->d_inode;
        struct fuse_conn *fc = get_fuse_conn(inode);
-        if (!fuse_allow_task(fc, current))
+        if (!fuse_allow_current_process(fc))
                return -EACCES;
        if (is_bad_inode(inode))
@@ -2066,6 +2167,7 @@ unsigned fuse_file_poll(struct file *file, poll_table *wait)
                return DEFAULT_POLLMASK;
        poll_wait(file, &ff->poll_wait, wait);
+        inarg.events = (__u32)poll_requested_events(wait);
        /*
         * Ask for notification iff there's someone waiting for it.
@@ -2076,7 +2178,7 @@ unsigned fuse_file_poll(struct file *file, poll_table *wait)
                fuse_register_polled_file(fc, ff);
        }
-        req = fuse_get_req(fc);
+        req = fuse_get_req_nopages(fc);
        if (IS_ERR(req))
                return POLLERR;
@@ -2126,41 +2228,6 @@ int fuse_notify_poll_wakeup(struct fuse_conn *fc,
        return 0;
 }
-static ssize_t fuse_loop_dio(struct file *filp, const struct iovec *iov,
-                             unsigned long nr_segs, loff_t *ppos, int rw)
-{
-        const struct iovec *vector = iov;
-        ssize_t ret = 0;
-        while (nr_segs > 0) {
-                void __user *base;
-                size_t len;
-                ssize_t nr;
-                base = vector->iov_base;
-                len = vector->iov_len;
-                vector++;
-                nr_segs--;
-                if (rw == WRITE)
-                        nr = __fuse_direct_write(filp, base, len, ppos);
-                else
-                        nr = fuse_direct_read(filp, base, len, ppos);
-                if (nr < 0) {
-                        if (!ret)
-                                ret = nr;
-                        break;
-                }
-                ret += nr;
-                if (nr != len)
-                        break;
-        }
-        return ret;
-}
 static ssize_t
 fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
                        loff_t offset, unsigned long nr_segs)
@@ -2172,13 +2239,16 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
        file = iocb->ki_filp;
        pos = offset;
-        ret = fuse_loop_dio(file, iov, nr_segs, &pos, rw);
+        if (rw == WRITE)
+                ret = __fuse_direct_write(file, iov, nr_segs, &pos);
+        else
+                ret = __fuse_direct_read(file, iov, nr_segs, &pos);
        return ret;
 }
-long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
+static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
-                            loff_t length)
+                                loff_t length)
 {
        struct fuse_file *ff = file->private_data;
        struct fuse_conn *fc = ff->fc;
@@ -2194,7 +2264,7 @@ long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
        if (fc->no_fallocate)
                return -EOPNOTSUPP;
-        req = fuse_get_req(fc);
+        req = fuse_get_req_nopages(fc);
        if (IS_ERR(req))
                return PTR_ERR(req);
@@ -2213,7 +2283,6 @@ long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
        return err;
 }
-EXPORT_SYMBOL_GPL(fuse_file_fallocate);
 static const struct file_operations fuse_file_operations = {
        .llseek         = fuse_file_llseek,
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index e105a53fc72d..6aeba864f070 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -44,6 +44,9 @@
    doing the mount will be allowed to access the filesystem */
 #define FUSE_ALLOW_OTHER         (1 << 1)
+/** Number of page pointers embedded in fuse_req */
+#define FUSE_REQ_INLINE_PAGES 1
 /** List of active connections */
 extern struct list_head fuse_conn_list;
@@ -103,6 +106,15 @@ struct fuse_inode {
        /** List of writepage requestst (pending or sent) */
        struct list_head writepages;
+        /** Miscellaneous bits describing inode state */
+        unsigned long state;
+};
+/** FUSE inode state bits */
+enum {
+        /** Advise readdirplus  */
+        FUSE_I_ADVISE_RDPLUS,
 };
 struct fuse_conn;
@@ -200,6 +212,12 @@ struct fuse_out {
        struct fuse_arg args[3];
 };
+/** FUSE page descriptor */
+struct fuse_page_desc {
+        unsigned int length;
+        unsigned int offset;
+};
 /** The request state */
 enum fuse_req_state {
        FUSE_REQ_INIT = 0,
@@ -291,14 +309,23 @@ struct fuse_req {
        } misc;
        /** page vector */
-        struct page *pages[FUSE_MAX_PAGES_PER_REQ];
+        struct page **pages;
+        /** page-descriptor vector */
+        struct fuse_page_desc *page_descs;
+        /** size of the 'pages' array */
+        unsigned max_pages;
+        /** inline page vector */
+        struct page *inline_pages[FUSE_REQ_INLINE_PAGES];
+        /** inline page-descriptor vector */
+        struct fuse_page_desc inline_page_descs[FUSE_REQ_INLINE_PAGES];
        /** number of pages in vector */
        unsigned num_pages;
-        /** offset of data on first page */
-        unsigned page_offset;
        /** File used in the request (or NULL) */
        struct fuse_file *ff;
@@ -487,6 +514,12 @@ struct fuse_conn {
        /** Use enhanced/automatic page cache invalidation. */
        unsigned auto_inval_data:1;
+        /** Does the filesystem support readdirplus? */
+        unsigned do_readdirplus:1;
+        /** Does the filesystem want adaptive readdirplus? */
+        unsigned readdirplus_auto:1;
        /** The number of requests waiting for completion */
        atomic_t num_waiting;
@@ -578,6 +611,9 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
 struct fuse_forget_link *fuse_alloc_forget(void);
+/* Used by READDIRPLUS */
+void fuse_force_forget(struct file *file, u64 nodeid);
 /**
 * Initialize READ or READDIR request
 */
@@ -658,9 +694,9 @@ void fuse_ctl_cleanup(void);
 /**
 * Allocate a request
 */
-struct fuse_req *fuse_request_alloc(void);
+struct fuse_req *fuse_request_alloc(unsigned npages);
-struct fuse_req *fuse_request_alloc_nofs(void);
+struct fuse_req *fuse_request_alloc_nofs(unsigned npages);
 /**
 * Free a request
@@ -668,14 +704,25 @@ struct fuse_req *fuse_request_alloc_nofs(void);
 void fuse_request_free(struct fuse_req *req);
 /**
- * Get a request, may fail with -ENOMEM
+ * Get a request, may fail with -ENOMEM,
+ * caller should specify # elements in req->pages[] explicitly
 */
-struct fuse_req *fuse_get_req(struct fuse_conn *fc);
+struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages);
+/**
+ * Get a request, may fail with -ENOMEM,
+ * useful for callers who doesn't use req->pages[]
+ */
+static inline struct fuse_req *fuse_get_req_nopages(struct fuse_conn *fc)
+{
+        return fuse_get_req(fc, 0);
+}
 /**
 * Gets a requests for a file operation, always succeeds
 */
-struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file);
+struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
+                                             struct file *file);
 /**
 * Decrement reference count of a request.  If count goes to zero free
@@ -739,9 +786,9 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc);
 int fuse_valid_type(int m);
 /**
- * Is task allowed to perform filesystem operation?
+ * Is current process allowed to perform filesystem operation?
 */
-int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task);
+int fuse_allow_current_process(struct fuse_conn *fc);
 u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id);
@@ -776,8 +823,9 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
 int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
                 bool isdir);
-ssize_t fuse_direct_io(struct file *file, const char __user *buf,
+ssize_t fuse_direct_io(struct file *file, const struct iovec *iov,
-                       size_t count, loff_t *ppos, int write);
+                       unsigned long nr_segs, size_t count, loff_t *ppos,
+                       int write);
 long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
                   unsigned int flags);
 long fuse_ioctl_common(struct file *file, unsigned int cmd,
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 73ca6b72beaf..df00993ed108 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -92,6 +92,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
        fi->attr_version = 0;
        fi->writectr = 0;
        fi->orig_ino = 0;
+        fi->state = 0;
        INIT_LIST_HEAD(&fi->write_files);
        INIT_LIST_HEAD(&fi->queued_writes);
        INIT_LIST_HEAD(&fi->writepages);
@@ -408,12 +409,12 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
        struct fuse_statfs_out outarg;
        int err;
-        if (!fuse_allow_task(fc, current)) {
+        if (!fuse_allow_current_process(fc)) {
                buf->f_type = FUSE_SUPER_MAGIC;
                return 0;
        }
-        req = fuse_get_req(fc);
+        req = fuse_get_req_nopages(fc);
        if (IS_ERR(req))
                return PTR_ERR(req);
@@ -678,7 +679,7 @@ static int fuse_encode_fh(struct inode *inode, u32 *fh, int *max_len,
        if (*max_len < len) {
                *max_len = len;
-                return  255;
+                return  FILEID_INVALID;
        }
        nodeid = get_fuse_inode(inode)->nodeid;
@@ -863,6 +864,10 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
                                fc->dont_mask = 1;
                        if (arg->flags & FUSE_AUTO_INVAL_DATA)
                                fc->auto_inval_data = 1;
+                        if (arg->flags & FUSE_DO_READDIRPLUS)
+                                fc->do_readdirplus = 1;
+                        if (arg->flags & FUSE_READDIRPLUS_AUTO)
+                                fc->readdirplus_auto = 1;
                } else {
                        ra_pages = fc->max_read / PAGE_CACHE_SIZE;
                        fc->no_lock = 1;
@@ -889,7 +894,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
        arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
                FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
                FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
-                FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA;
+                FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
+                FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO;
        req->in.h.opcode = FUSE_INIT;
        req->in.numargs = 1;
        req->in.args[0].size = sizeof(*arg);
@@ -1034,12 +1040,12 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
        /* only now - we want root dentry with NULL ->d_op */
        sb->s_d_op = &fuse_dentry_operations;
-        init_req = fuse_request_alloc();
+        init_req = fuse_request_alloc(0);
        if (!init_req)
                goto err_put_root;
        if (is_bdev) {
-                fc->destroy_req = fuse_request_alloc();
+                fc->destroy_req = fuse_request_alloc(0);
                if (!fc->destroy_req)
                        goto err_free_init_req;
        }
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index f850020ad906..f69ac0af5496 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -237,7 +237,7 @@ static int gfs2_xattr_system_set(struct dentry *dentry, const char *name,
                return -EINVAL;
        if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
                return value ? -EACCES : 0;
-        if ((current_fsuid() != inode->i_uid) && !capable(CAP_FOWNER))
+        if (!uid_eq(current_fsuid(), inode->i_uid) && !capable(CAP_FOWNER))
                return -EPERM;
        if (S_ISLNK(inode->i_mode))
                return -EOPNOTSUPP;
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 30de4f2a2ea9..24f414f0ce61 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -51,7 +51,7 @@ static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
                        continue;
                if (gfs2_is_jdata(ip))
                        set_buffer_uptodate(bh);
-                gfs2_trans_add_bh(ip->i_gl, bh, 0);
+                gfs2_trans_add_data(ip->i_gl, bh);
        }
 }
@@ -230,16 +230,14 @@ out_ignore:
 }
 /**
- * gfs2_writeback_writepages - Write a bunch of dirty pages back to disk
+ * gfs2_writepages - Write a bunch of dirty pages back to disk
 * @mapping: The mapping to write
 * @wbc: Write-back control
 *
- * For the data=writeback case we can already ignore buffer heads
+ * Used for both ordered and writeback modes.
- * and write whole extents at once. This is a big reduction in the
- * number of I/O requests we send and the bmap calls we make in this case.
 */
-static int gfs2_writeback_writepages(struct address_space *mapping,
+static int gfs2_writepages(struct address_space *mapping,
-                                     struct writeback_control *wbc)
+                           struct writeback_control *wbc)
 {
        return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc);
 }
@@ -852,7 +850,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
                goto failed;
        }
-        gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+        gfs2_trans_add_meta(ip->i_gl, dibh);
        if (gfs2_is_stuffed(ip))
                return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, page);
@@ -1102,7 +1100,7 @@ cannot_release:
 static const struct address_space_operations gfs2_writeback_aops = {
        .writepage = gfs2_writeback_writepage,
-        .writepages = gfs2_writeback_writepages,
+        .writepages = gfs2_writepages,
        .readpage = gfs2_readpage,
        .readpages = gfs2_readpages,
        .write_begin = gfs2_write_begin,
@@ -1118,6 +1116,7 @@ static const struct address_space_operations gfs2_writeback_aops = {
 static const struct address_space_operations gfs2_ordered_aops = {
        .writepage = gfs2_ordered_writepage,
+        .writepages = gfs2_writepages,
        .readpage = gfs2_readpage,
        .readpages = gfs2_readpages,
        .write_begin = gfs2_write_begin,
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index a68e91bcef3d..5e83657f046e 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -22,6 +22,7 @@
 #include "meta_io.h"
 #include "quota.h"
 #include "rgrp.h"
+#include "log.h"
 #include "super.h"
 #include "trans.h"
 #include "dir.h"
@@ -93,7 +94,7 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
        if (!gfs2_is_jdata(ip))
                mark_buffer_dirty(bh);
        if (!gfs2_is_writeback(ip))
-                gfs2_trans_add_bh(ip->i_gl, bh, 0);
+                gfs2_trans_add_data(ip->i_gl, bh);
        if (release) {
                unlock_page(page);
@@ -153,7 +154,7 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
        /*  Set up the pointer to the new block  */
-        gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+        gfs2_trans_add_meta(ip->i_gl, dibh);
        di = (struct gfs2_dinode *)dibh->b_data;
        gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
@@ -405,7 +406,7 @@ static inline __be64 *gfs2_indirect_init(struct metapath *mp,
        BUG_ON(i < 1);
        BUG_ON(mp->mp_bh[i] != NULL);
        mp->mp_bh[i] = gfs2_meta_new(gl, bn);
-        gfs2_trans_add_bh(gl, mp->mp_bh[i], 1);
+        gfs2_trans_add_meta(gl, mp->mp_bh[i]);
        gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN);
        gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header));
        ptr += offset;
@@ -468,7 +469,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
        BUG_ON(sheight < 1);
        BUG_ON(dibh == NULL);
-        gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+        gfs2_trans_add_meta(ip->i_gl, dibh);
        if (height == sheight) {
                struct buffer_head *bh;
@@ -544,7 +545,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
                /* Branching from existing tree */
                case ALLOC_GROW_DEPTH:
                        if (i > 1 && i < height)
-                                gfs2_trans_add_bh(ip->i_gl, mp->mp_bh[i-1], 1);
+                                gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[i-1]);
                        for (; i < height && n > 0; i++, n--)
                                gfs2_indirect_init(mp, ip->i_gl, i,
                                                   mp->mp_list[i-1], bn++);
@@ -556,7 +557,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
                case ALLOC_DATA:
                        BUG_ON(n > dblks);
                        BUG_ON(mp->mp_bh[end_of_metadata] == NULL);
-                        gfs2_trans_add_bh(ip->i_gl, mp->mp_bh[end_of_metadata], 1);
+                        gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[end_of_metadata]);
                        dblks = n;
                        ptr = metapointer(end_of_metadata, mp);
                        dblock = bn;
@@ -796,8 +797,8 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
        down_write(&ip->i_rw_mutex);
-        gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+        gfs2_trans_add_meta(ip->i_gl, dibh);
-        gfs2_trans_add_bh(ip->i_gl, bh, 1);
+        gfs2_trans_add_meta(ip->i_gl, bh);
        bstart = 0;
        blen = 0;
@@ -981,7 +982,7 @@ static int gfs2_block_truncate_page(struct address_space *mapping, loff_t from)
        }
        if (!gfs2_is_writeback(ip))
-                gfs2_trans_add_bh(ip->i_gl, bh, 0);
+                gfs2_trans_add_data(ip->i_gl, bh);
        zero_user(page, offset, length);
        mark_buffer_dirty(bh);
@@ -1046,7 +1047,7 @@ static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize)
        if (error)
                goto out;
-        gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+        gfs2_trans_add_meta(ip->i_gl, dibh);
        if (gfs2_is_stuffed(ip)) {
                gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize);
@@ -1098,7 +1099,7 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 size)
        if (error)
                return error;
-        error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
+        error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
        if (error)
                return error;
@@ -1137,11 +1138,12 @@ static int trunc_end(struct gfs2_inode *ip)
                ip->i_height = 0;
                ip->i_goal = ip->i_no_addr;
                gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
+                gfs2_ordered_del_inode(ip);
        }
        ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
        ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG;
-        gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+        gfs2_trans_add_meta(ip->i_gl, dibh);
        gfs2_dinode_out(ip, dibh->b_data);
        brelse(dibh);
@@ -1246,7 +1248,7 @@ static int do_grow(struct inode *inode, u64 size)
        i_size_write(inode, size);
        ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
-        gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+        gfs2_trans_add_meta(ip->i_gl, dibh);
        gfs2_dinode_out(ip, dibh->b_data);
        brelse(dibh);
@@ -1286,6 +1288,10 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize)
        inode_dio_wait(inode);
+        ret = gfs2_rs_alloc(GFS2_I(inode));
+        if (ret)
+                return ret;
        oldsize = inode->i_size;
        if (newsize >= oldsize)
                return do_grow(inode, newsize);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 9a35670fdc38..c3e82bd23179 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -93,7 +93,7 @@ int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block,
        struct buffer_head *bh;
        bh = gfs2_meta_new(ip->i_gl, block);
-        gfs2_trans_add_bh(ip->i_gl, bh, 1);
+        gfs2_trans_add_meta(ip->i_gl, bh);
        gfs2_metatype_set(bh, GFS2_METATYPE_JD, GFS2_FORMAT_JD);
        gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header));
        *bhp = bh;
@@ -127,7 +127,7 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf,
        if (error)
                return error;
-        gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+        gfs2_trans_add_meta(ip->i_gl, dibh);
        memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size);
        if (ip->i_inode.i_size < offset + size)
                i_size_write(&ip->i_inode, offset + size);
@@ -209,7 +209,7 @@ static int gfs2_dir_write_data(struct gfs2_inode *ip, const char *buf,
                if (error)
                        goto fail;
-                gfs2_trans_add_bh(ip->i_gl, bh, 1);
+                gfs2_trans_add_meta(ip->i_gl, bh);
                memcpy(bh->b_data + o, buf, amount);
                brelse(bh);
@@ -231,7 +231,7 @@ out:
                i_size_write(&ip->i_inode, offset + copied);
        ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
-        gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+        gfs2_trans_add_meta(ip->i_gl, dibh);
        gfs2_dinode_out(ip, dibh->b_data);
        brelse(dibh);
@@ -647,7 +647,7 @@ static void dirent_del(struct gfs2_inode *dip, struct buffer_head *bh,
                return;
        }
-        gfs2_trans_add_bh(dip->i_gl, bh, 1);
+        gfs2_trans_add_meta(dip->i_gl, bh);
        /* If there is no prev entry, this is the first entry in the block.
           The de_rec_len is already as big as it needs to be.  Just zero
@@ -690,7 +690,7 @@ static struct gfs2_dirent *gfs2_init_dirent(struct inode *inode,
                offset = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len));
        totlen = be16_to_cpu(dent->de_rec_len);
        BUG_ON(offset + name->len > totlen);
-        gfs2_trans_add_bh(ip->i_gl, bh, 1);
+        gfs2_trans_add_meta(ip->i_gl, bh);
        ndent = (struct gfs2_dirent *)((char *)dent + offset);
        dent->de_rec_len = cpu_to_be16(offset);
        gfs2_qstr2dirent(name, totlen - offset, ndent);
@@ -831,7 +831,7 @@ static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh,
                return NULL;
        gfs2_trans_add_unrevoke(GFS2_SB(inode), bn, 1);
-        gfs2_trans_add_bh(ip->i_gl, bh, 1);
+        gfs2_trans_add_meta(ip->i_gl, bh);
        gfs2_metatype_set(bh, GFS2_METATYPE_LF, GFS2_FORMAT_LF);
        leaf = (struct gfs2_leaf *)bh->b_data;
        leaf->lf_depth = cpu_to_be16(depth);
@@ -916,7 +916,7 @@ static int dir_make_exhash(struct inode *inode)
        /*  We're done with the new leaf block, now setup the new
            hash table.  */
-        gfs2_trans_add_bh(dip->i_gl, dibh, 1);
+        gfs2_trans_add_meta(dip->i_gl, dibh);
        gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
        lp = (__be64 *)(dibh->b_data + sizeof(struct gfs2_dinode));
@@ -976,7 +976,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
                return 1; /* can't split */
        }
-        gfs2_trans_add_bh(dip->i_gl, obh, 1);
+        gfs2_trans_add_meta(dip->i_gl, obh);
        nleaf = new_leaf(inode, &nbh, be16_to_cpu(oleaf->lf_depth) + 1);
        if (!nleaf) {
@@ -1069,7 +1069,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
        error = gfs2_meta_inode_buffer(dip, &dibh);
        if (!gfs2_assert_withdraw(GFS2_SB(&dip->i_inode), !error)) {
-                gfs2_trans_add_bh(dip->i_gl, dibh, 1);
+                gfs2_trans_add_meta(dip->i_gl, dibh);
                gfs2_add_inode_blocks(&dip->i_inode, 1);
                gfs2_dinode_out(dip, dibh->b_data);
                brelse(dibh);
@@ -1622,7 +1622,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name)
                        return error;
        } while(1);
-        gfs2_trans_add_bh(ip->i_gl, obh, 1);
+        gfs2_trans_add_meta(ip->i_gl, obh);
        leaf = new_leaf(inode, &bh, be16_to_cpu(oleaf->lf_depth));
        if (!leaf) {
@@ -1636,7 +1636,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name)
        error = gfs2_meta_inode_buffer(ip, &bh);
        if (error)
                return error;
-        gfs2_trans_add_bh(ip->i_gl, bh, 1);
+        gfs2_trans_add_meta(ip->i_gl, bh);
        gfs2_add_inode_blocks(&ip->i_inode, 1);
        gfs2_dinode_out(ip, bh->b_data);
        brelse(bh);
@@ -1795,7 +1795,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
        if (IS_ERR(dent))
                return PTR_ERR(dent);
-        gfs2_trans_add_bh(dip->i_gl, bh, 1);
+        gfs2_trans_add_meta(dip->i_gl, bh);
        gfs2_inum_out(nip, dent);
        dent->de_type = cpu_to_be16(new_type);
@@ -1804,7 +1804,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
                error = gfs2_meta_inode_buffer(dip, &bh);
                if (error)
                        return error;
-                gfs2_trans_add_bh(dip->i_gl, bh, 1);
+                gfs2_trans_add_meta(dip->i_gl, bh);
        }
        dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME;
@@ -1849,7 +1849,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
        if (!ht)
                return -ENOMEM;
-        error = gfs2_quota_hold(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
+        error = gfs2_quota_hold(dip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
        if (error)
                goto out;
@@ -1917,7 +1917,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
        if (error)
                goto out_end_trans;
-        gfs2_trans_add_bh(dip->i_gl, dibh, 1);
+        gfs2_trans_add_meta(dip->i_gl, dibh);
        /* On the last dealloc, make this a regular file in case we crash.
           (We don't want to free these blocks a second time.)  */
        if (last_dealloc)
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index 4767774a5f3e..9973df4ff565 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -37,10 +37,10 @@ static int gfs2_encode_fh(struct inode *inode, __u32 *p, int *len,
        if (parent && (*len < GFS2_LARGE_FH_SIZE)) {
                *len = GFS2_LARGE_FH_SIZE;
-                return 255;
+                return FILEID_INVALID;
        } else if (*len < GFS2_SMALL_FH_SIZE) {
                *len = GFS2_SMALL_FH_SIZE;
-                return 255;
+                return FILEID_INVALID;
        }
        fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 991ab2d484dd..019f45e45097 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -157,7 +157,7 @@ static const u32 gfs2_to_fsflags[32] = {
 static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
 {
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct gfs2_inode *ip = GFS2_I(inode);
        struct gfs2_holder gh;
        int error;
@@ -217,7 +217,7 @@ void gfs2_set_inode_flags(struct inode *inode)
 */
 static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
 {
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct gfs2_inode *ip = GFS2_I(inode);
        struct gfs2_sbd *sdp = GFS2_SB(inode);
        struct buffer_head *bh;
@@ -276,7 +276,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
        error = gfs2_meta_inode_buffer(ip, &bh);
        if (error)
                goto out_trans_end;
-        gfs2_trans_add_bh(ip->i_gl, bh, 1);
+        gfs2_trans_add_meta(ip->i_gl, bh);
        ip->i_diskflags = new_flags;
        gfs2_dinode_out(ip, bh->b_data);
        brelse(bh);
@@ -293,7 +293,7 @@ out_drop_write:
 static int gfs2_set_flags(struct file *filp, u32 __user *ptr)
 {
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        u32 fsflags, gfsflags;
        if (get_user(fsflags, ptr))
@@ -336,7 +336,7 @@ static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 static void gfs2_size_hint(struct file *filep, loff_t offset, size_t size)
 {
-        struct inode *inode = filep->f_dentry->d_inode;
+        struct inode *inode = file_inode(filep);
        struct gfs2_sbd *sdp = GFS2_SB(inode);
        struct gfs2_inode *ip = GFS2_I(inode);
        size_t blks = (size + sdp->sd_sb.sb_bsize - 1) >> sdp->sd_sb.sb_bsize_shift;
@@ -386,7 +386,7 @@ static int gfs2_allocate_page_backing(struct page *page)
 static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
        struct page *page = vmf->page;
-        struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(vma->vm_file);
        struct gfs2_inode *ip = GFS2_I(inode);
        struct gfs2_sbd *sdp = GFS2_SB(inode);
        unsigned long last_index;
@@ -483,7 +483,7 @@ out:
        gfs2_holder_uninit(&gh);
        if (ret == 0) {
                set_page_dirty(page);
-                wait_on_page_writeback(page);
+                wait_for_stable_page(page);
        }
        sb_end_pagefault(inode->i_sb);
        return block_page_mkwrite_return(ret);
@@ -673,8 +673,7 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 {
        struct file *file = iocb->ki_filp;
        size_t writesize = iov_length(iov, nr_segs);
-        struct dentry *dentry = file->f_dentry;
+        struct gfs2_inode *ip = GFS2_I(file_inode(file));
-        struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
        int ret;
        ret = gfs2_rs_alloc(ip);
@@ -709,7 +708,7 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
        if (unlikely(error))
                return error;
-        gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+        gfs2_trans_add_meta(ip->i_gl, dibh);
        if (gfs2_is_stuffed(ip)) {
                error = gfs2_unstuff_dinode(ip, NULL);
@@ -772,7 +771,7 @@ static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len,
 static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
                           loff_t len)
 {
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(file);
        struct gfs2_sbd *sdp = GFS2_SB(inode);
        struct gfs2_inode *ip = GFS2_I(inode);
        unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
@@ -938,7 +937,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
 {
        struct gfs2_file *fp = file->private_data;
        struct gfs2_holder *fl_gh = &fp->f_fl_gh;
-        struct gfs2_inode *ip = GFS2_I(file->f_path.dentry->d_inode);
+        struct gfs2_inode *ip = GFS2_I(file_inode(file));
        struct gfs2_glock *gl;
        unsigned int state;
        int flags;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 992c5c0cb504..cf3515546739 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -30,6 +30,7 @@
 #include <linux/rculist_bl.h>
 #include <linux/bit_spinlock.h>
 #include <linux/percpu.h>
+#include <linux/list_sort.h>
 #include "gfs2.h"
 #include "incore.h"
@@ -1376,56 +1377,105 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
                gfs2_glock_put(gl);
 }
+static int glock_cmp(void *priv, struct list_head *a, struct list_head *b)
+{
+        struct gfs2_glock *gla, *glb;
-static int gfs2_shrink_glock_memory(struct shrinker *shrink,
+        gla = list_entry(a, struct gfs2_glock, gl_lru);
-                                    struct shrink_control *sc)
+        glb = list_entry(b, struct gfs2_glock, gl_lru);
+        if (gla->gl_name.ln_number > glb->gl_name.ln_number)
+                return 1;
+        if (gla->gl_name.ln_number < glb->gl_name.ln_number)
+                return -1;
+        return 0;
+}
+/**
+ * gfs2_dispose_glock_lru - Demote a list of glocks
+ * @list: The list to dispose of
+ *
+ * Disposing of glocks may involve disk accesses, so that here we sort
+ * the glocks by number (i.e. disk location of the inodes) so that if
+ * there are any such accesses, they'll be sent in order (mostly).
+ *
+ * Must be called under the lru_lock, but may drop and retake this
+ * lock. While the lru_lock is dropped, entries may vanish from the
+ * list, but no new entries will appear on the list (since it is
+ * private)
+ */
+static void gfs2_dispose_glock_lru(struct list_head *list)
+__releases(&lru_lock)
+__acquires(&lru_lock)
 {
        struct gfs2_glock *gl;
-        int may_demote;
-        int nr_skipped = 0;
-        int nr = sc->nr_to_scan;
-        gfp_t gfp_mask = sc->gfp_mask;
-        LIST_HEAD(skipped);
-        if (nr == 0)
+        list_sort(NULL, list, glock_cmp);
-                goto out;
-        if (!(gfp_mask & __GFP_FS))
+        while(!list_empty(list)) {
-                return -1;
+                gl = list_entry(list->next, struct gfs2_glock, gl_lru);
+                list_del_init(&gl->gl_lru);
+                clear_bit(GLF_LRU, &gl->gl_flags);
+                gfs2_glock_hold(gl);
+                spin_unlock(&lru_lock);
+                spin_lock(&gl->gl_spin);
+                if (demote_ok(gl))
+                        handle_callback(gl, LM_ST_UNLOCKED, 0);
+                WARN_ON(!test_and_clear_bit(GLF_LOCK, &gl->gl_flags));
+                smp_mb__after_clear_bit();
+                if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
+                        gfs2_glock_put_nolock(gl);
+                spin_unlock(&gl->gl_spin);
+                spin_lock(&lru_lock);
+        }
+}
+/**
+ * gfs2_scan_glock_lru - Scan the LRU looking for locks to demote
+ * @nr: The number of entries to scan
+ *
+ * This function selects the entries on the LRU which are able to
+ * be demoted, and then kicks off the process by calling
+ * gfs2_dispose_glock_lru() above.
+ */
+static void gfs2_scan_glock_lru(int nr)
+{
+        struct gfs2_glock *gl;
+        LIST_HEAD(skipped);
+        LIST_HEAD(dispose);
        spin_lock(&lru_lock);
        while(nr && !list_empty(&lru_list)) {
                gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru);
-                list_del_init(&gl->gl_lru);
-                clear_bit(GLF_LRU, &gl->gl_flags);
-                atomic_dec(&lru_count);
                /* Test for being demotable */
                if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
-                        gfs2_glock_hold(gl);
+                        list_move(&gl->gl_lru, &dispose);
-                        spin_unlock(&lru_lock);
+                        atomic_dec(&lru_count);
-                        spin_lock(&gl->gl_spin);
+                        nr--;
-                        may_demote = demote_ok(gl);
-                        if (may_demote) {
-                                handle_callback(gl, LM_ST_UNLOCKED, 0);
-                                nr--;
-                        }
-                        clear_bit(GLF_LOCK, &gl->gl_flags);
-                        smp_mb__after_clear_bit();
-                        if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
-                                gfs2_glock_put_nolock(gl);
-                        spin_unlock(&gl->gl_spin);
-                        spin_lock(&lru_lock);
                        continue;
                }
-                nr_skipped++;
-                list_add(&gl->gl_lru, &skipped);
+                list_move(&gl->gl_lru, &skipped);
-                set_bit(GLF_LRU, &gl->gl_flags);
        }
        list_splice(&skipped, &lru_list);
-        atomic_add(nr_skipped, &lru_count);
+        if (!list_empty(&dispose))
+                gfs2_dispose_glock_lru(&dispose);
        spin_unlock(&lru_lock);
-out:
+}
+static int gfs2_shrink_glock_memory(struct shrinker *shrink,
+                                    struct shrink_control *sc)
+{
+        if (sc->nr_to_scan) {
+                if (!(sc->gfp_mask & __GFP_FS))
+                        return -1;
+                gfs2_scan_glock_lru(sc->nr_to_scan);
+        }
        return (atomic_read(&lru_count) / 100) * sysctl_vfs_cache_pressure;
 }
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 78d4184ffc7d..444b6503ebc4 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -322,8 +322,8 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
                break;
        };
-        ip->i_inode.i_uid = be32_to_cpu(str->di_uid);
+        i_uid_write(&ip->i_inode, be32_to_cpu(str->di_uid));
-        ip->i_inode.i_gid = be32_to_cpu(str->di_gid);
+        i_gid_write(&ip->i_inode, be32_to_cpu(str->di_gid));
        gfs2_set_nlink(&ip->i_inode, be32_to_cpu(str->di_nlink));
        i_size_write(&ip->i_inode, be64_to_cpu(str->di_size));
        gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks));
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index c373a24fedd9..156e42ec84ea 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -52,7 +52,6 @@ struct gfs2_log_header_host {
 */
 struct gfs2_log_operations {
-        void (*lo_add) (struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
        void (*lo_before_commit) (struct gfs2_sbd *sdp);
        void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_ail *ai);
        void (*lo_before_scan) (struct gfs2_jdesc *jd,
@@ -341,6 +340,7 @@ enum {
        GIF_QD_LOCKED           = 1,
        GIF_ALLOC_FAILED        = 2,
        GIF_SW_PAGED            = 3,
+        GIF_ORDERED             = 4,
 };
 struct gfs2_inode {
@@ -357,6 +357,7 @@ struct gfs2_inode {
        struct gfs2_rgrpd *i_rgd;
        u64 i_goal;     /* goal block for allocations */
        struct rw_semaphore i_rw_mutex;
+        struct list_head i_ordered;
        struct list_head i_trunc_list;
        __be64 *i_hash_cache;
        u32 i_entries;
@@ -391,7 +392,6 @@ struct gfs2_revoke_replay {
 };
 enum {
-        QDF_USER                = 0,
        QDF_CHANGE              = 1,
        QDF_LOCKED              = 2,
        QDF_REFRESH             = 3,
@@ -403,7 +403,7 @@ struct gfs2_quota_data {
        atomic_t qd_count;
-        u32 qd_id;
+        struct kqid qd_id;
        unsigned long qd_flags;         /* QDF_... */
        s64 qd_change;
@@ -641,6 +641,7 @@ struct gfs2_sbd {
        wait_queue_head_t sd_glock_wait;
        atomic_t sd_glock_disposal;
        struct completion sd_locking_init;
+        struct completion sd_wdack;
        struct delayed_work sd_control_work;
        /* Inode Stuff */
@@ -723,6 +724,7 @@ struct gfs2_sbd {
        struct list_head sd_log_le_revoke;
        struct list_head sd_log_le_databuf;
        struct list_head sd_log_le_ordered;
+        spinlock_t sd_ordered_lock;
        atomic_t sd_log_thresh1;
        atomic_t sd_log_thresh2;
@@ -758,10 +760,7 @@ struct gfs2_sbd {
        unsigned int sd_replayed_blocks;
        /* For quiescing the filesystem */
        struct gfs2_holder sd_freeze_gh;
-        struct mutex sd_freeze_lock;
-        unsigned int sd_freeze_count;
        char sd_fsname[GFS2_FSNAME_LEN];
        char sd_table_name[GFS2_FSNAME_LEN];
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 2b6f5698ef18..cc00bd1d1f87 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -368,10 +368,11 @@ static void munge_mode_uid_gid(const struct gfs2_inode *dip,
                               struct inode *inode)
 {
        if (GFS2_SB(&dip->i_inode)->sd_args.ar_suiddir &&
-            (dip->i_inode.i_mode & S_ISUID) && dip->i_inode.i_uid) {
+            (dip->i_inode.i_mode & S_ISUID) &&
+            !uid_eq(dip->i_inode.i_uid, GLOBAL_ROOT_UID)) {
                if (S_ISDIR(inode->i_mode))
                        inode->i_mode |= S_ISUID;
-                else if (dip->i_inode.i_uid != current_fsuid())
+                else if (!uid_eq(dip->i_inode.i_uid, current_fsuid()))
                        inode->i_mode &= ~07111;
                inode->i_uid = dip->i_inode.i_uid;
        } else
@@ -447,7 +448,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_inode *ip,
        struct timespec tv = CURRENT_TIME;
        dibh = gfs2_meta_new(ip->i_gl, ip->i_no_addr);
-        gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+        gfs2_trans_add_meta(ip->i_gl, dibh);
        gfs2_metatype_set(dibh, GFS2_METATYPE_DI, GFS2_FORMAT_DI);
        gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
        di = (struct gfs2_dinode *)dibh->b_data;
@@ -455,8 +456,8 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_inode *ip,
        di->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
        di->di_num.no_addr = cpu_to_be64(ip->i_no_addr);
        di->di_mode = cpu_to_be32(ip->i_inode.i_mode);
-        di->di_uid = cpu_to_be32(ip->i_inode.i_uid);
+        di->di_uid = cpu_to_be32(i_uid_read(&ip->i_inode));
-        di->di_gid = cpu_to_be32(ip->i_inode.i_gid);
+        di->di_gid = cpu_to_be32(i_gid_read(&ip->i_inode));
        di->di_nlink = 0;
        di->di_size = cpu_to_be64(ip->i_inode.i_size);
        di->di_blocks = cpu_to_be64(1);
@@ -548,7 +549,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
        if (error)
                return error;
-        error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
+        error = gfs2_quota_lock(dip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
        if (error)
                goto fail;
@@ -584,7 +585,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
        if (error)
                goto fail_end_trans;
        set_nlink(&ip->i_inode, S_ISDIR(ip->i_inode.i_mode) ? 2 : 1);
-        gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+        gfs2_trans_add_meta(ip->i_gl, dibh);
        gfs2_dinode_out(ip, dibh->b_data);
        brelse(dibh);
        return 0;
@@ -931,7 +932,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
        if (error)
                goto out_brelse;
-        gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+        gfs2_trans_add_meta(ip->i_gl, dibh);
        inc_nlink(&ip->i_inode);
        ip->i_inode.i_ctime = CURRENT_TIME;
        ihold(inode);
@@ -978,8 +979,8 @@ static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
                return -EPERM;
        if ((dip->i_inode.i_mode & S_ISVTX) &&
-            dip->i_inode.i_uid != current_fsuid() &&
+            !uid_eq(dip->i_inode.i_uid, current_fsuid()) &&
-            ip->i_inode.i_uid != current_fsuid() && !capable(CAP_FOWNER))
+            !uid_eq(ip->i_inode.i_uid, current_fsuid()) && !capable(CAP_FOWNER))
                return -EPERM;
        if (IS_APPEND(&dip->i_inode))
@@ -1412,7 +1413,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
                if (error)
                        goto out_end_trans;
                ip->i_inode.i_ctime = CURRENT_TIME;
-                gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+                gfs2_trans_add_meta(ip->i_gl, dibh);
                gfs2_dinode_out(ip, dibh->b_data);
                brelse(dibh);
        }
@@ -1580,7 +1581,8 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
 {
        struct gfs2_inode *ip = GFS2_I(inode);
        struct gfs2_sbd *sdp = GFS2_SB(inode);
-        u32 ouid, ogid, nuid, ngid;
+        kuid_t ouid, nuid;
+        kgid_t ogid, ngid;
        int error;
        ouid = inode->i_uid;
@@ -1588,16 +1590,17 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
        nuid = attr->ia_uid;
        ngid = attr->ia_gid;
-        if (!(attr->ia_valid & ATTR_UID) || ouid == nuid)
+        if (!(attr->ia_valid & ATTR_UID) || uid_eq(ouid, nuid))
-                ouid = nuid = NO_QUOTA_CHANGE;
+                ouid = nuid = NO_UID_QUOTA_CHANGE;
-        if (!(attr->ia_valid & ATTR_GID) || ogid == ngid)
+        if (!(attr->ia_valid & ATTR_GID) || gid_eq(ogid, ngid))
-                ogid = ngid = NO_QUOTA_CHANGE;
+                ogid = ngid = NO_GID_QUOTA_CHANGE;
        error = gfs2_quota_lock(ip, nuid, ngid);
        if (error)
                return error;
-        if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
+        if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) ||
+            !gid_eq(ogid, NO_GID_QUOTA_CHANGE)) {
                error = gfs2_quota_check(ip, nuid, ngid);
                if (error)
                        goto out_gunlock_q;
@@ -1611,7 +1614,8 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
        if (error)
                goto out_end_trans;
-        if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
+        if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) ||
+            !gid_eq(ogid, NO_GID_QUOTA_CHANGE)) {
                u64 blocks = gfs2_get_inode_blocks(&ip->i_inode);
                gfs2_quota_change(ip, -blocks, ouid, ogid);
                gfs2_quota_change(ip, blocks, nuid, ngid);
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 8dad6b093716..9802de0f85e6 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -241,6 +241,7 @@ static u32 make_flags(struct gfs2_glock *gl, const unsigned int gfs_flags,
 static void gfs2_reverse_hex(char *c, u64 value)
 {
+        *c = '0';
        while (value) {
                *c-- = hex_asc[value & 0x0f];
                value >>= 4;
@@ -280,6 +281,7 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
 {
        struct gfs2_sbd *sdp = gl->gl_sbd;
        struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+        int lvb_needs_unlock = 0;
        int error;
        if (gl->gl_lksb.sb_lkid == 0) {
@@ -293,8 +295,12 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
        gfs2_update_request_times(gl);
        /* don't want to skip dlm_unlock writing the lvb when lock is ex */
+        if (gl->gl_lksb.sb_lvbptr && (gl->gl_state == LM_ST_EXCLUSIVE))
+                lvb_needs_unlock = 1;
        if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) &&
-            gl->gl_lksb.sb_lvbptr && (gl->gl_state != LM_ST_EXCLUSIVE)) {
+            !lvb_needs_unlock) {
                gfs2_glock_free(gl);
                return;
        }
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index f4beeb9c81c1..9a2ca8be7647 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -482,70 +482,66 @@ static void log_flush_wait(struct gfs2_sbd *sdp)
        }
 }
-static int bd_cmp(void *priv, struct list_head *a, struct list_head *b)
+static int ip_cmp(void *priv, struct list_head *a, struct list_head *b)
 {
-        struct gfs2_bufdata *bda, *bdb;
+        struct gfs2_inode *ipa, *ipb;
-        bda = list_entry(a, struct gfs2_bufdata, bd_list);
+        ipa = list_entry(a, struct gfs2_inode, i_ordered);
-        bdb = list_entry(b, struct gfs2_bufdata, bd_list);
+        ipb = list_entry(b, struct gfs2_inode, i_ordered);
-        if (bda->bd_bh->b_blocknr < bdb->bd_bh->b_blocknr)
+        if (ipa->i_no_addr < ipb->i_no_addr)
                return -1;
-        if (bda->bd_bh->b_blocknr > bdb->bd_bh->b_blocknr)
+        if (ipa->i_no_addr > ipb->i_no_addr)
                return 1;
        return 0;
 }
 static void gfs2_ordered_write(struct gfs2_sbd *sdp)
 {
-        struct gfs2_bufdata *bd;
+        struct gfs2_inode *ip;
-        struct buffer_head *bh;
        LIST_HEAD(written);
-        gfs2_log_lock(sdp);
+        spin_lock(&sdp->sd_ordered_lock);
-        list_sort(NULL, &sdp->sd_log_le_ordered, &bd_cmp);
+        list_sort(NULL, &sdp->sd_log_le_ordered, &ip_cmp);
        while (!list_empty(&sdp->sd_log_le_ordered)) {
-                bd = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_bufdata, bd_list);
+                ip = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_inode, i_ordered);
-                list_move(&bd->bd_list, &written);
+                list_move(&ip->i_ordered, &written);
-                bh = bd->bd_bh;
+                if (ip->i_inode.i_mapping->nrpages == 0)
-                if (!buffer_dirty(bh))
                        continue;
-                get_bh(bh);
+                spin_unlock(&sdp->sd_ordered_lock);
-                gfs2_log_unlock(sdp);
+                filemap_fdatawrite(ip->i_inode.i_mapping);
-                lock_buffer(bh);
+                spin_lock(&sdp->sd_ordered_lock);
-                if (buffer_mapped(bh) && test_clear_buffer_dirty(bh)) {
-                        bh->b_end_io = end_buffer_write_sync;
-                        submit_bh(WRITE_SYNC, bh);
-                } else {
-                        unlock_buffer(bh);
-                        brelse(bh);
-                }
-                gfs2_log_lock(sdp);
        }
        list_splice(&written, &sdp->sd_log_le_ordered);
-        gfs2_log_unlock(sdp);
+        spin_unlock(&sdp->sd_ordered_lock);
 }
 static void gfs2_ordered_wait(struct gfs2_sbd *sdp)
 {
-        struct gfs2_bufdata *bd;
+        struct gfs2_inode *ip;
-        struct buffer_head *bh;
-        gfs2_log_lock(sdp);
+        spin_lock(&sdp->sd_ordered_lock);
        while (!list_empty(&sdp->sd_log_le_ordered)) {
-                bd = list_entry(sdp->sd_log_le_ordered.prev, struct gfs2_bufdata, bd_list);
+                ip = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_inode, i_ordered);
-                bh = bd->bd_bh;
+                list_del(&ip->i_ordered);
-                if (buffer_locked(bh)) {
+                WARN_ON(!test_and_clear_bit(GIF_ORDERED, &ip->i_flags));
-                        get_bh(bh);
+                if (ip->i_inode.i_mapping->nrpages == 0)
-                        gfs2_log_unlock(sdp);
-                        wait_on_buffer(bh);
-                        brelse(bh);
-                        gfs2_log_lock(sdp);
                        continue;
-                }
+                spin_unlock(&sdp->sd_ordered_lock);
-                list_del_init(&bd->bd_list);
+                filemap_fdatawait(ip->i_inode.i_mapping);
+                spin_lock(&sdp->sd_ordered_lock);
        }
-        gfs2_log_unlock(sdp);
+        spin_unlock(&sdp->sd_ordered_lock);
+}
+void gfs2_ordered_del_inode(struct gfs2_inode *ip)
+{
+        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+        spin_lock(&sdp->sd_ordered_lock);
+        if (test_and_clear_bit(GIF_ORDERED, &ip->i_flags))
+                list_del(&ip->i_ordered);
+        spin_unlock(&sdp->sd_ordered_lock);
 }
 /**
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index 3fd5215ea25f..3566f35915e0 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -48,6 +48,18 @@ static inline void gfs2_log_pointers_init(struct gfs2_sbd *sdp,
        sdp->sd_log_head = sdp->sd_log_tail = value;
 }
+static inline void gfs2_ordered_add_inode(struct gfs2_inode *ip)
+{
+        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+        if (!test_bit(GIF_ORDERED, &ip->i_flags)) {
+                spin_lock(&sdp->sd_ordered_lock);
+                if (!test_and_set_bit(GIF_ORDERED, &ip->i_flags))
+                        list_add(&ip->i_ordered, &sdp->sd_log_le_ordered);
+                spin_unlock(&sdp->sd_ordered_lock);
+        }
+}
+extern void gfs2_ordered_del_inode(struct gfs2_inode *ip);
 extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
                            unsigned int ssize);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 9ceccb1595a3..a5055977a214 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -37,7 +37,7 @@
 *
 * The log lock must be held when calling this function
 */
-static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
+void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
 {
        struct gfs2_bufdata *bd;
@@ -388,32 +388,6 @@ static struct page *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type,
        return page;
 }
-static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
-{
-        struct gfs2_meta_header *mh;
-        struct gfs2_trans *tr;
-        tr = current->journal_info;
-        tr->tr_touched = 1;
-        if (!list_empty(&bd->bd_list))
-                return;
-        set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
-        set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
-        mh = (struct gfs2_meta_header *)bd->bd_bh->b_data;
-        if (unlikely(mh->mh_magic != cpu_to_be32(GFS2_MAGIC))) {
-                printk(KERN_ERR
-                       "Attempting to add uninitialised block to journal (inplace block=%lld)\n",
-                       (unsigned long long)bd->bd_bh->b_blocknr);
-                BUG();
-        }
-        gfs2_pin(sdp, bd->bd_bh);
-        mh->__pad0 = cpu_to_be64(0);
-        mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
-        sdp->sd_log_num_buf++;
-        list_add(&bd->bd_list, &sdp->sd_log_le_buf);
-        tr->tr_num_buf_new++;
-}
 static void gfs2_check_magic(struct buffer_head *bh)
 {
        void *kaddr;
@@ -600,20 +574,6 @@ static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
                jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks);
 }
-static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
-{
-        struct gfs2_glock *gl = bd->bd_gl;
-        struct gfs2_trans *tr;
-        tr = current->journal_info;
-        tr->tr_touched = 1;
-        tr->tr_num_revoke++;
-        sdp->sd_log_num_revoke++;
-        atomic_inc(&gl->gl_revokes);
-        set_bit(GLF_LFLUSH, &gl->gl_flags);
-        list_add(&bd->bd_list, &sdp->sd_log_le_revoke);
-}
 static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
 {
        struct gfs2_meta_header *mh;
@@ -749,44 +709,6 @@ static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
 }
 /**
- * databuf_lo_add - Add a databuf to the transaction.
- *
- * This is used in two distinct cases:
- * i) In ordered write mode
- *    We put the data buffer on a list so that we can ensure that its
- *    synced to disk at the right time
- * ii) In journaled data mode
- *    We need to journal the data block in the same way as metadata in
- *    the functions above. The difference is that here we have a tag
- *    which is two __be64's being the block number (as per meta data)
- *    and a flag which says whether the data block needs escaping or
- *    not. This means we need a new log entry for each 251 or so data
- *    blocks, which isn't an enormous overhead but twice as much as
- *    for normal metadata blocks.
- */
-static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
-{
-        struct gfs2_trans *tr = current->journal_info;
-        struct address_space *mapping = bd->bd_bh->b_page->mapping;
-        struct gfs2_inode *ip = GFS2_I(mapping->host);
-        if (tr)
-                tr->tr_touched = 1;
-        if (!list_empty(&bd->bd_list))
-                return;
-        set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
-        set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
-        if (gfs2_is_jdata(ip)) {
-                gfs2_pin(sdp, bd->bd_bh);
-                tr->tr_num_databuf_new++;
-                sdp->sd_log_num_databuf++;
-                list_add_tail(&bd->bd_list, &sdp->sd_log_le_databuf);
-        } else {
-                list_add_tail(&bd->bd_list, &sdp->sd_log_le_ordered);
-        }
-}
-/**
 * databuf_lo_before_commit - Scan the data buffers, writing as we go
 *
 */
@@ -885,7 +807,6 @@ static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
 const struct gfs2_log_operations gfs2_buf_lops = {
-        .lo_add = buf_lo_add,
        .lo_before_commit = buf_lo_before_commit,
        .lo_after_commit = buf_lo_after_commit,
        .lo_before_scan = buf_lo_before_scan,
@@ -895,7 +816,6 @@ const struct gfs2_log_operations gfs2_buf_lops = {
 };
 const struct gfs2_log_operations gfs2_revoke_lops = {
-        .lo_add = revoke_lo_add,
        .lo_before_commit = revoke_lo_before_commit,
        .lo_after_commit = revoke_lo_after_commit,
        .lo_before_scan = revoke_lo_before_scan,
@@ -909,7 +829,6 @@ const struct gfs2_log_operations gfs2_rg_lops = {
 };
 const struct gfs2_log_operations gfs2_databuf_lops = {
-        .lo_add = databuf_lo_add,
        .lo_before_commit = databuf_lo_before_commit,
        .lo_after_commit = databuf_lo_after_commit,
        .lo_scan_elements = databuf_lo_scan_elements,
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
index 954a330585f4..ba77b7da8325 100644
--- a/fs/gfs2/lops.h
+++ b/fs/gfs2/lops.h
@@ -29,6 +29,7 @@ extern const struct gfs2_log_operations gfs2_databuf_lops;
 extern const struct gfs2_log_operations *gfs2_log_ops[];
 extern void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page);
 extern void gfs2_log_flush_bio(struct gfs2_sbd *sdp, int rw);
+extern void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh);
 static inline unsigned int buf_limit(struct gfs2_sbd *sdp)
 {
@@ -46,19 +47,6 @@ static inline unsigned int databuf_limit(struct gfs2_sbd *sdp)
        return limit;
 }
-static inline void lops_init_le(struct gfs2_bufdata *bd,
-                                const struct gfs2_log_operations *lops)
-{
-        INIT_LIST_HEAD(&bd->bd_list);
-        bd->bd_ops = lops;
-}
-static inline void lops_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
-{
-        if (bd->bd_ops->lo_add)
-                bd->bd_ops->lo_add(sdp, bd);
-}
 static inline void lops_before_commit(struct gfs2_sbd *sdp)
 {
        int x;
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 22255d96b27e..b059bbb5059e 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -271,41 +271,6 @@ int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh)
        return 0;
 }
-/**
- * gfs2_attach_bufdata - attach a struct gfs2_bufdata structure to a buffer
- * @gl: the glock the buffer belongs to
- * @bh: The buffer to be attached to
- * @meta: Flag to indicate whether its metadata or not
- */
-void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh,
-                         int meta)
-{
-        struct gfs2_bufdata *bd;
-        if (meta)
-                lock_page(bh->b_page);
-        if (bh->b_private) {
-                if (meta)
-                        unlock_page(bh->b_page);
-                return;
-        }
-        bd = kmem_cache_zalloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL);
-        bd->bd_bh = bh;
-        bd->bd_gl = gl;
-        if (meta)
-                lops_init_le(bd, &gfs2_buf_lops);
-        else
-                lops_init_le(bd, &gfs2_databuf_lops);
-        bh->b_private = bd;
-        if (meta)
-                unlock_page(bh->b_page);
-}
 void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int meta)
 {
        struct address_space *mapping = bh->b_page->mapping;
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
index c30973b07a7c..0d4c843b6f8e 100644
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@@ -56,9 +56,6 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno,
 int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh);
 struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create);
-void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh,
-                         int meta);
 void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr,
                              int meta);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 0e3554edb8f2..1b612be4b873 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -81,6 +81,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
        init_waitqueue_head(&sdp->sd_glock_wait);
        atomic_set(&sdp->sd_glock_disposal, 0);
        init_completion(&sdp->sd_locking_init);
+        init_completion(&sdp->sd_wdack);
        spin_lock_init(&sdp->sd_statfs_spin);
        spin_lock_init(&sdp->sd_rindex_spin);
@@ -102,6 +103,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
        INIT_LIST_HEAD(&sdp->sd_log_le_revoke);
        INIT_LIST_HEAD(&sdp->sd_log_le_databuf);
        INIT_LIST_HEAD(&sdp->sd_log_le_ordered);
+        spin_lock_init(&sdp->sd_ordered_lock);
        init_waitqueue_head(&sdp->sd_log_waitq);
        init_waitqueue_head(&sdp->sd_logd_waitq);
@@ -115,8 +117,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
        INIT_LIST_HEAD(&sdp->sd_revoke_list);
-        mutex_init(&sdp->sd_freeze_lock);
        return sdp;
 }
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index ae55e248c3b7..c7c840e916f8 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -65,13 +65,10 @@
 #include "inode.h"
 #include "util.h"
-#define QUOTA_USER 1
-#define QUOTA_GROUP 0
 struct gfs2_quota_change_host {
        u64 qc_change;
        u32 qc_flags; /* GFS2_QCF_... */
-        u32 qc_id;
+        struct kqid qc_id;
 };
 static LIST_HEAD(qd_lru_list);
@@ -120,17 +117,24 @@ out:
        return (atomic_read(&qd_lru_count) * sysctl_vfs_cache_pressure) / 100;
 }
+static u64 qd2index(struct gfs2_quota_data *qd)
+{
+        struct kqid qid = qd->qd_id;
+        return (2 * (u64)from_kqid(&init_user_ns, qid)) +
+                (qid.type == USRQUOTA) ? 0 : 1;
+}
 static u64 qd2offset(struct gfs2_quota_data *qd)
 {
        u64 offset;
-        offset = 2 * (u64)qd->qd_id + !test_bit(QDF_USER, &qd->qd_flags);
+        offset = qd2index(qd);
        offset *= sizeof(struct gfs2_quota);
        return offset;
 }
-static int qd_alloc(struct gfs2_sbd *sdp, int user, u32 id,
+static int qd_alloc(struct gfs2_sbd *sdp, struct kqid qid,
                    struct gfs2_quota_data **qdp)
 {
        struct gfs2_quota_data *qd;
@@ -141,13 +145,11 @@ static int qd_alloc(struct gfs2_sbd *sdp, int user, u32 id,
                return -ENOMEM;
        atomic_set(&qd->qd_count, 1);
-        qd->qd_id = id;
+        qd->qd_id = qid;
-        if (user)
-                set_bit(QDF_USER, &qd->qd_flags);
        qd->qd_slot = -1;
        INIT_LIST_HEAD(&qd->qd_reclaim);
-        error = gfs2_glock_get(sdp, 2 * (u64)id + !user,
+        error = gfs2_glock_get(sdp, qd2index(qd),
                              &gfs2_quota_glops, CREATE, &qd->qd_gl);
        if (error)
                goto fail;
@@ -161,7 +163,7 @@ fail:
        return error;
 }
-static int qd_get(struct gfs2_sbd *sdp, int user, u32 id,
+static int qd_get(struct gfs2_sbd *sdp, struct kqid qid,
                  struct gfs2_quota_data **qdp)
 {
        struct gfs2_quota_data *qd = NULL, *new_qd = NULL;
@@ -173,8 +175,7 @@ static int qd_get(struct gfs2_sbd *sdp, int user, u32 id,
                found = 0;
                spin_lock(&qd_lru_lock);
                list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) {
-                        if (qd->qd_id == id &&
+                        if (qid_eq(qd->qd_id, qid)) {
-                            !test_bit(QDF_USER, &qd->qd_flags) == !user) {
                                if (!atomic_read(&qd->qd_count) &&
                                    !list_empty(&qd->qd_reclaim)) {
                                        /* Remove it from reclaim list */
@@ -208,7 +209,7 @@ static int qd_get(struct gfs2_sbd *sdp, int user, u32 id,
                        return 0;
                }
-                error = qd_alloc(sdp, user, id, &new_qd);
+                error = qd_alloc(sdp, qid, &new_qd);
                if (error)
                        return error;
        }
@@ -458,12 +459,12 @@ static void qd_unlock(struct gfs2_quota_data *qd)
        qd_put(qd);
 }
-static int qdsb_get(struct gfs2_sbd *sdp, int user, u32 id,
+static int qdsb_get(struct gfs2_sbd *sdp, struct kqid qid,
                    struct gfs2_quota_data **qdp)
 {
        int error;
-        error = qd_get(sdp, user, id, qdp);
+        error = qd_get(sdp, qid, qdp);
        if (error)
                return error;
@@ -491,7 +492,7 @@ static void qdsb_put(struct gfs2_quota_data *qd)
        qd_put(qd);
 }
-int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
+int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
        struct gfs2_quota_data **qd;
@@ -512,28 +513,30 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
        if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
                return 0;
-        error = qdsb_get(sdp, QUOTA_USER, ip->i_inode.i_uid, qd);
+        error = qdsb_get(sdp, make_kqid_uid(ip->i_inode.i_uid), qd);
        if (error)
                goto out;
        ip->i_res->rs_qa_qd_num++;
        qd++;
-        error = qdsb_get(sdp, QUOTA_GROUP, ip->i_inode.i_gid, qd);
+        error = qdsb_get(sdp, make_kqid_gid(ip->i_inode.i_gid), qd);
        if (error)
                goto out;
        ip->i_res->rs_qa_qd_num++;
        qd++;
-        if (uid != NO_QUOTA_CHANGE && uid != ip->i_inode.i_uid) {
+        if (!uid_eq(uid, NO_UID_QUOTA_CHANGE) &&
-                error = qdsb_get(sdp, QUOTA_USER, uid, qd);
+            !uid_eq(uid, ip->i_inode.i_uid)) {
+                error = qdsb_get(sdp, make_kqid_uid(uid), qd);
                if (error)
                        goto out;
                ip->i_res->rs_qa_qd_num++;
                qd++;
        }
-        if (gid != NO_QUOTA_CHANGE && gid != ip->i_inode.i_gid) {
+        if (!gid_eq(gid, NO_GID_QUOTA_CHANGE) &&
-                error = qdsb_get(sdp, QUOTA_GROUP, gid, qd);
+            !gid_eq(gid, ip->i_inode.i_gid)) {
+                error = qdsb_get(sdp, make_kqid_gid(gid), qd);
                if (error)
                        goto out;
                ip->i_res->rs_qa_qd_num++;
@@ -567,18 +570,10 @@ static int sort_qd(const void *a, const void *b)
        const struct gfs2_quota_data *qd_a = *(const struct gfs2_quota_data **)a;
        const struct gfs2_quota_data *qd_b = *(const struct gfs2_quota_data **)b;
-        if (!test_bit(QDF_USER, &qd_a->qd_flags) !=
+        if (qid_lt(qd_a->qd_id, qd_b->qd_id))
-            !test_bit(QDF_USER, &qd_b->qd_flags)) {
-                if (test_bit(QDF_USER, &qd_a->qd_flags))
-                        return -1;
-                else
-                        return 1;
-        }
-        if (qd_a->qd_id < qd_b->qd_id)
                return -1;
-        if (qd_a->qd_id > qd_b->qd_id)
+        if (qid_lt(qd_b->qd_id, qd_a->qd_id))
                return 1;
        return 0;
 }
@@ -590,14 +585,14 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change)
        s64 x;
        mutex_lock(&sdp->sd_quota_mutex);
-        gfs2_trans_add_bh(ip->i_gl, qd->qd_bh, 1);
+        gfs2_trans_add_meta(ip->i_gl, qd->qd_bh);
        if (!test_bit(QDF_CHANGE, &qd->qd_flags)) {
                qc->qc_change = 0;
                qc->qc_flags = 0;
-                if (test_bit(QDF_USER, &qd->qd_flags))
+                if (qd->qd_id.type == USRQUOTA)
                        qc->qc_flags = cpu_to_be32(GFS2_QCF_USER);
-                qc->qc_id = cpu_to_be32(qd->qd_id);
+                qc->qc_id = cpu_to_be32(from_kqid(&init_user_ns, qd->qd_id));
        }
        x = be64_to_cpu(qc->qc_change) + change;
@@ -726,7 +721,7 @@ get_a_page:
                        goto unlock_out;
        }
-        gfs2_trans_add_bh(ip->i_gl, bh, 0);
+        gfs2_trans_add_meta(ip->i_gl, bh);
        kaddr = kmap_atomic(page);
        if (offset + sizeof(struct gfs2_quota) > PAGE_CACHE_SIZE)
@@ -925,7 +920,7 @@ fail:
        return error;
 }
-int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
+int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
        struct gfs2_quota_data *qd;
@@ -1040,13 +1035,13 @@ static int print_message(struct gfs2_quota_data *qd, char *type)
        printk(KERN_INFO "GFS2: fsid=%s: quota %s for %s %u\n",
               sdp->sd_fsname, type,
-               (test_bit(QDF_USER, &qd->qd_flags)) ? "user" : "group",
+               (qd->qd_id.type == USRQUOTA) ? "user" : "group",
-               qd->qd_id);
+               from_kqid(&init_user_ns, qd->qd_id));
        return 0;
 }
-int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
+int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
        struct gfs2_quota_data *qd;
@@ -1063,8 +1058,8 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
        for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
                qd = ip->i_res->rs_qa_qd[x];
-                if (!((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) ||
+                if (!(qid_eq(qd->qd_id, make_kqid_uid(uid)) ||
-                      (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))))
+                      qid_eq(qd->qd_id, make_kqid_gid(gid))))
                        continue;
                value = (s64)be64_to_cpu(qd->qd_qb.qb_value);
@@ -1074,10 +1069,7 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
                if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) {
                        print_message(qd, "exceeded");
-                        quota_send_warning(make_kqid(&init_user_ns,
+                        quota_send_warning(qd->qd_id,
-                                                     test_bit(QDF_USER, &qd->qd_flags) ?
-                                                     USRQUOTA : GRPQUOTA,
-                                                     qd->qd_id),
                                           sdp->sd_vfs->s_dev, QUOTA_NL_BHARDWARN);
                        error = -EDQUOT;
@@ -1087,10 +1079,7 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
                           time_after_eq(jiffies, qd->qd_last_warn +
                                         gfs2_tune_get(sdp,
                                                gt_quota_warn_period) * HZ)) {
-                        quota_send_warning(make_kqid(&init_user_ns,
+                        quota_send_warning(qd->qd_id,
-                                                     test_bit(QDF_USER, &qd->qd_flags) ?
-                                                     USRQUOTA : GRPQUOTA,
-                                                     qd->qd_id),
                                           sdp->sd_vfs->s_dev, QUOTA_NL_BSOFTWARN);
                        error = print_message(qd, "warning");
                        qd->qd_last_warn = jiffies;
@@ -1101,7 +1090,7 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
 }
 void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
-                       u32 uid, u32 gid)
+                       kuid_t uid, kgid_t gid)
 {
        struct gfs2_quota_data *qd;
        unsigned int x;
@@ -1114,8 +1103,8 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
        for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
                qd = ip->i_res->rs_qa_qd[x];
-                if ((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) ||
+                if (qid_eq(qd->qd_id, make_kqid_uid(uid)) ||
-                    (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))) {
+                    qid_eq(qd->qd_id, make_kqid_gid(gid))) {
                        do_qc(qd, change);
                }
        }
@@ -1170,13 +1159,13 @@ static int gfs2_quota_sync_timeo(struct super_block *sb, int type)
        return gfs2_quota_sync(sb, type);
 }
-int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id)
+int gfs2_quota_refresh(struct gfs2_sbd *sdp, struct kqid qid)
 {
        struct gfs2_quota_data *qd;
        struct gfs2_holder q_gh;
        int error;
-        error = qd_get(sdp, user, id, &qd);
+        error = qd_get(sdp, qid, &qd);
        if (error)
                return error;
@@ -1194,7 +1183,9 @@ static void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void *
        qc->qc_change = be64_to_cpu(str->qc_change);
        qc->qc_flags = be32_to_cpu(str->qc_flags);
-        qc->qc_id = be32_to_cpu(str->qc_id);
+        qc->qc_id = make_kqid(&init_user_ns,
+                              (qc->qc_flags & GFS2_QCF_USER)?USRQUOTA:GRPQUOTA,
+                              be32_to_cpu(str->qc_id));
 }
 int gfs2_quota_init(struct gfs2_sbd *sdp)
@@ -1257,8 +1248,7 @@ int gfs2_quota_init(struct gfs2_sbd *sdp)
                        if (!qc.qc_change)
                                continue;
-                        error = qd_alloc(sdp, (qc.qc_flags & GFS2_QCF_USER),
+                        error = qd_alloc(sdp, qc.qc_id, &qd);
-                                         qc.qc_id, &qd);
                        if (error) {
                                brelse(bh);
                                goto fail;
@@ -1485,21 +1475,17 @@ static int gfs2_get_dqblk(struct super_block *sb, struct kqid qid,
        struct gfs2_quota_data *qd;
        struct gfs2_holder q_gh;
        int error;
-        int type;
        memset(fdq, 0, sizeof(struct fs_disk_quota));
        if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
                return -ESRCH; /* Crazy XFS error code */
-        if (qid.type == USRQUOTA)
+        if ((qid.type != USRQUOTA) &&
-                type = QUOTA_USER;
+            (qid.type != GRPQUOTA))
-        else if (qid.type == GRPQUOTA)
-                type = QUOTA_GROUP;
-        else
                return -EINVAL;
-        error = qd_get(sdp, type, from_kqid(&init_user_ns, qid), &qd);
+        error = qd_get(sdp, qid, &qd);
        if (error)
                return error;
        error = do_glock(qd, FORCE, &q_gh);
@@ -1508,8 +1494,8 @@ static int gfs2_get_dqblk(struct super_block *sb, struct kqid qid,
        qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr;
        fdq->d_version = FS_DQUOT_VERSION;
-        fdq->d_flags = (type == QUOTA_USER) ? FS_USER_QUOTA : FS_GROUP_QUOTA;
+        fdq->d_flags = (qid.type == USRQUOTA) ? FS_USER_QUOTA : FS_GROUP_QUOTA;
-        fdq->d_id = from_kqid(&init_user_ns, qid);
+        fdq->d_id = from_kqid_munged(current_user_ns(), qid);
        fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit) << sdp->sd_fsb2bb_shift;
        fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn) << sdp->sd_fsb2bb_shift;
        fdq->d_bcount = be64_to_cpu(qlvb->qb_value) << sdp->sd_fsb2bb_shift;
@@ -1535,32 +1521,18 @@ static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid,
        int alloc_required;
        loff_t offset;
        int error;
-        int type;
        if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
                return -ESRCH; /* Crazy XFS error code */
-        switch(qid.type) {
+        if ((qid.type != USRQUOTA) &&
-        case USRQUOTA:
+            (qid.type != GRPQUOTA))
-                type = QUOTA_USER;
-                if (fdq->d_flags != FS_USER_QUOTA)
-                        return -EINVAL;
-                break;
-        case GRPQUOTA:
-                type = QUOTA_GROUP;
-                if (fdq->d_flags != FS_GROUP_QUOTA)
-                        return -EINVAL;
-                break;
-        default:
                return -EINVAL;
-        }
        if (fdq->d_fieldmask & ~GFS2_FIELDMASK)
                return -EINVAL;
-        if (fdq->d_id != from_kqid(&init_user_ns, qid))
-                return -EINVAL;
-        error = qd_get(sdp, type, from_kqid(&init_user_ns, qid), &qd);
+        error = qd_get(sdp, qid, &qd);
        if (error)
                return error;
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index f25d98b87904..4f5e6e44ed83 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -14,20 +14,21 @@ struct gfs2_inode;
 struct gfs2_sbd;
 struct shrink_control;
-#define NO_QUOTA_CHANGE ((u32)-1)
+#define NO_UID_QUOTA_CHANGE INVALID_UID
+#define NO_GID_QUOTA_CHANGE INVALID_GID
-extern int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid);
+extern int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid);
 extern void gfs2_quota_unhold(struct gfs2_inode *ip);
-extern int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid);
+extern int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid);
 extern void gfs2_quota_unlock(struct gfs2_inode *ip);
-extern int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid);
+extern int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid);
 extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
-                              u32 uid, u32 gid);
+                              kuid_t uid, kgid_t gid);
 extern int gfs2_quota_sync(struct super_block *sb, int type);
-extern int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id);
+extern int gfs2_quota_refresh(struct gfs2_sbd *sdp, struct kqid qid);
 extern int gfs2_quota_init(struct gfs2_sbd *sdp);
 extern void gfs2_quota_cleanup(struct gfs2_sbd *sdp);
@@ -41,7 +42,7 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
        int ret;
        if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
                return 0;
-        ret = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
+        ret = gfs2_quota_lock(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
        if (ret)
                return ret;
        if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 37ee061d899e..d1f51fd73f86 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -350,10 +350,14 @@ static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len)
                BUG_ON(len < chunk_size);
                len -= chunk_size;
                block = gfs2_rbm_to_block(&rbm);
-                gfs2_rbm_from_block(&rbm, block + chunk_size);
+                if (gfs2_rbm_from_block(&rbm, block + chunk_size)) {
-                n_unaligned = 3;
+                        n_unaligned = 0;
-                if (ptr)
                        break;
+                }
+                if (ptr) {
+                        n_unaligned = 3;
+                        break;
+                }
                n_unaligned = len & 3;
        }
@@ -557,22 +561,20 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd)
 */
 int gfs2_rs_alloc(struct gfs2_inode *ip)
 {
-        struct gfs2_blkreserv *res;
+        int error = 0;
+        down_write(&ip->i_rw_mutex);
        if (ip->i_res)
-                return 0;
+                goto out;
-        res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
-        if (!res)
-                return -ENOMEM;
-        RB_CLEAR_NODE(&res->rs_node);
+        ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
+        if (!ip->i_res) {
+                error = -ENOMEM;
+                goto out;
+        }
-        down_write(&ip->i_rw_mutex);
+        RB_CLEAR_NODE(&ip->i_res->rs_node);
-        if (ip->i_res)
+out:
-                kmem_cache_free(gfs2_rsrv_cachep, res);
-        else
-                ip->i_res = res;
        up_write(&ip->i_rw_mutex);
        return 0;
 }
@@ -1255,7 +1257,7 @@ fail:
 int gfs2_fitrim(struct file *filp, void __user *argp)
 {
-        struct inode *inode = filp->f_dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct gfs2_sbd *sdp = GFS2_SB(inode);
        struct request_queue *q = bdev_get_queue(sdp->sd_vfs->s_bdev);
        struct buffer_head *bh;
@@ -1321,7 +1323,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
                        if (ret == 0) {
                                bh = rgd->rd_bits[0].bi_bh;
                                rgd->rd_flags |= GFS2_RGF_TRIMMED;
-                                gfs2_trans_add_bh(rgd->rd_gl, bh, 1);
+                                gfs2_trans_add_meta(rgd->rd_gl, bh);
                                gfs2_rgrp_out(rgd, bh->b_data);
                                gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, bh->b_data);
                                gfs2_trans_end(sdp);
@@ -1424,6 +1426,9 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
                rs->rs_free = extlen;
                rs->rs_inum = ip->i_no_addr;
                rs_insert(ip);
+        } else {
+                if (goal == rgd->rd_last_alloc + rgd->rd_data0)
+                        rgd->rd_last_alloc = 0;
        }
 }
@@ -1963,14 +1968,14 @@ static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode,
        *n = 1;
        block = gfs2_rbm_to_block(rbm);
-        gfs2_trans_add_bh(rbm->rgd->rd_gl, rbm->bi->bi_bh, 1);
+        gfs2_trans_add_meta(rbm->rgd->rd_gl, rbm->bi->bi_bh);
        gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
        block++;
        while (*n < elen) {
                ret = gfs2_rbm_from_block(&pos, block);
                if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE)
                        break;
-                gfs2_trans_add_bh(pos.rgd->rd_gl, pos.bi->bi_bh, 1);
+                gfs2_trans_add_meta(pos.rgd->rd_gl, pos.bi->bi_bh);
                gfs2_setbit(&pos, true, GFS2_BLKST_USED);
                (*n)++;
                block++;
@@ -2009,7 +2014,7 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
                               rbm.bi->bi_bh->b_data + rbm.bi->bi_offset,
                               rbm.bi->bi_len);
                }
-                gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.bi->bi_bh, 1);
+                gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.bi->bi_bh);
                gfs2_setbit(&rbm, false, new_state);
        }
@@ -2152,7 +2157,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
                if (error == 0) {
                        struct gfs2_dinode *di =
                                (struct gfs2_dinode *)dibh->b_data;
-                        gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+                        gfs2_trans_add_meta(ip->i_gl, dibh);
                        di->di_goal_meta = di->di_goal_data =
                                cpu_to_be64(ip->i_goal);
                        brelse(dibh);
@@ -2171,7 +2176,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
                        *generation = rbm.rgd->rd_igeneration++;
        }
-        gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh, 1);
+        gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh);
        gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data);
        gfs2_rgrp_ondisk2lvb(rbm.rgd->rd_rgl, rbm.rgd->rd_bits[0].bi_bh->b_data);
@@ -2218,7 +2223,7 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
        trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE);
        rgd->rd_free += blen;
        rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
-        gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
+        gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
        gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
        gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
@@ -2255,7 +2260,7 @@ void gfs2_unlink_di(struct inode *inode)
        if (!rgd)
                return;
        trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
-        gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
+        gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
        gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
        gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
        update_rgrp_lvb_unlinked(rgd, 1);
@@ -2276,7 +2281,7 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
        rgd->rd_dinodes--;
        rgd->rd_free++;
-        gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
+        gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
        gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
        gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
        update_rgrp_lvb_unlinked(rgd, -1);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index d6488674d916..cab77b8ba84f 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -500,7 +500,7 @@ void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
        if (error)
                return;
-        gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1);
+        gfs2_trans_add_meta(l_ip->i_gl, l_bh);
        spin_lock(&sdp->sd_statfs_spin);
        l_sc->sc_total += total;
@@ -528,7 +528,7 @@ void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh,
        struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
        struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
-        gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1);
+        gfs2_trans_add_meta(l_ip->i_gl, l_bh);
        spin_lock(&sdp->sd_statfs_spin);
        m_sc->sc_total += l_sc->sc_total;
@@ -539,7 +539,7 @@ void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh,
               0, sizeof(struct gfs2_statfs_change));
        spin_unlock(&sdp->sd_statfs_spin);
-        gfs2_trans_add_bh(m_ip->i_gl, m_bh, 1);
+        gfs2_trans_add_meta(m_ip->i_gl, m_bh);
        gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode));
 }
@@ -663,54 +663,6 @@ out:
        return error;
 }
-/**
- * gfs2_freeze_fs - freezes the file system
- * @sdp: the file system
- *
- * This function flushes data and meta data for all machines by
- * acquiring the transaction log exclusively.  All journals are
- * ensured to be in a clean state as well.
- *
- * Returns: errno
- */
-int gfs2_freeze_fs(struct gfs2_sbd *sdp)
-{
-        int error = 0;
-        mutex_lock(&sdp->sd_freeze_lock);
-        if (!sdp->sd_freeze_count++) {
-                error = gfs2_lock_fs_check_clean(sdp, &sdp->sd_freeze_gh);
-                if (error)
-                        sdp->sd_freeze_count--;
-        }
-        mutex_unlock(&sdp->sd_freeze_lock);
-        return error;
-}
-/**
- * gfs2_unfreeze_fs - unfreezes the file system
- * @sdp: the file system
- *
- * This function allows the file system to proceed by unlocking
- * the exclusively held transaction lock.  Other GFS2 nodes are
- * now free to acquire the lock shared and go on with their lives.
- *
- */
-void gfs2_unfreeze_fs(struct gfs2_sbd *sdp)
-{
-        mutex_lock(&sdp->sd_freeze_lock);
-        if (sdp->sd_freeze_count && !--sdp->sd_freeze_count)
-                gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
-        mutex_unlock(&sdp->sd_freeze_lock);
-}
 void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
 {
        struct gfs2_dinode *str = buf;
@@ -721,8 +673,8 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
        str->di_num.no_addr = cpu_to_be64(ip->i_no_addr);
        str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
        str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
-        str->di_uid = cpu_to_be32(ip->i_inode.i_uid);
+        str->di_uid = cpu_to_be32(i_uid_read(&ip->i_inode));
-        str->di_gid = cpu_to_be32(ip->i_inode.i_gid);
+        str->di_gid = cpu_to_be32(i_gid_read(&ip->i_inode));
        str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
        str->di_size = cpu_to_be64(i_size_read(&ip->i_inode));
        str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
@@ -824,7 +776,7 @@ static void gfs2_dirty_inode(struct inode *inode, int flags)
        ret = gfs2_meta_inode_buffer(ip, &bh);
        if (ret == 0) {
-                gfs2_trans_add_bh(ip->i_gl, bh, 1);
+                gfs2_trans_add_meta(ip->i_gl, bh);
                gfs2_dinode_out(ip, bh->b_data);
                brelse(bh);
        }
@@ -888,13 +840,6 @@ static void gfs2_put_super(struct super_block *sb)
        int error;
        struct gfs2_jdesc *jd;
-        /*  Unfreeze the filesystem, if we need to  */
-        mutex_lock(&sdp->sd_freeze_lock);
-        if (sdp->sd_freeze_count)
-                gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
-        mutex_unlock(&sdp->sd_freeze_lock);
        /* No more recovery requests */
        set_bit(SDF_NORECOVERY, &sdp->sd_flags);
        smp_mb();
@@ -985,7 +930,7 @@ static int gfs2_freeze(struct super_block *sb)
                return -EINVAL;
        for (;;) {
-                error = gfs2_freeze_fs(sdp);
+                error = gfs2_lock_fs_check_clean(sdp, &sdp->sd_freeze_gh);
                if (!error)
                        break;
@@ -1013,7 +958,9 @@ static int gfs2_freeze(struct super_block *sb)
 static int gfs2_unfreeze(struct super_block *sb)
 {
-        gfs2_unfreeze_fs(sb->s_fs_info);
+        struct gfs2_sbd *sdp = sb->s_fs_info;
+        gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
        return 0;
 }
@@ -1429,7 +1376,7 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
        if (error)
                return error;
-        error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
+        error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
        if (error)
                return error;
@@ -1577,6 +1524,7 @@ out:
        /* Case 3 starts here */
        truncate_inode_pages(&inode->i_data, 0);
        gfs2_rs_delete(ip);
+        gfs2_ordered_del_inode(ip);
        clear_inode(inode);
        gfs2_dir_hash_inval(ip);
        ip->i_gl->gl_object = NULL;
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index a0464680af0b..90e3322ffa10 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -46,9 +46,6 @@ extern void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh,
                          struct buffer_head *l_bh);
 extern int gfs2_statfs_sync(struct super_block *sb, int type);
-extern int gfs2_freeze_fs(struct gfs2_sbd *sdp);
-extern void gfs2_unfreeze_fs(struct gfs2_sbd *sdp);
 extern struct file_system_type gfs2_fs_type;
 extern struct file_system_type gfs2meta_fs_type;
 extern const struct export_operations gfs2_export_ops;
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 8056b7b7238e..aa5c48044966 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -91,39 +91,37 @@ static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf)
 static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf)
 {
-        unsigned int count;
+        struct super_block *sb = sdp->sd_vfs;
+        int frozen = (sb->s_writers.frozen == SB_UNFROZEN) ? 0 : 1;
-        mutex_lock(&sdp->sd_freeze_lock);
-        count = sdp->sd_freeze_count;
-        mutex_unlock(&sdp->sd_freeze_lock);
-        return snprintf(buf, PAGE_SIZE, "%u\n", count);
+        return snprintf(buf, PAGE_SIZE, "%u\n", frozen);
 }
 static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
 {
-        ssize_t ret = len;
+        int error;
-        int error = 0;
        int n = simple_strtol(buf, NULL, 0);
        if (!capable(CAP_SYS_ADMIN))
-                return -EACCES;
+                return -EPERM;
        switch (n) {
        case 0:
-                gfs2_unfreeze_fs(sdp);
+                error = thaw_super(sdp->sd_vfs);
                break;
        case 1:
-                error = gfs2_freeze_fs(sdp);
+                error = freeze_super(sdp->sd_vfs);
                break;
        default:
-                ret = -EINVAL;
+                return -EINVAL;
        }
-        if (error)
+        if (error) {
                fs_warn(sdp, "freeze %d error %d", n, error);
+                return error;
+        }
-        return ret;
+        return len;
 }
 static ssize_t withdraw_show(struct gfs2_sbd *sdp, char *buf)
@@ -135,7 +133,7 @@ static ssize_t withdraw_show(struct gfs2_sbd *sdp, char *buf)
 static ssize_t withdraw_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
 {
        if (!capable(CAP_SYS_ADMIN))
-                return -EACCES;
+                return -EPERM;
        if (simple_strtol(buf, NULL, 0) != 1)
                return -EINVAL;
@@ -150,7 +148,7 @@ static ssize_t statfs_sync_store(struct gfs2_sbd *sdp, const char *buf,
                                 size_t len)
 {
        if (!capable(CAP_SYS_ADMIN))
-                return -EACCES;
+                return -EPERM;
        if (simple_strtol(buf, NULL, 0) != 1)
                return -EINVAL;
@@ -163,7 +161,7 @@ static ssize_t quota_sync_store(struct gfs2_sbd *sdp, const char *buf,
                                size_t len)
 {
        if (!capable(CAP_SYS_ADMIN))
-                return -EACCES;
+                return -EPERM;
        if (simple_strtol(buf, NULL, 0) != 1)
                return -EINVAL;
@@ -175,30 +173,40 @@ static ssize_t quota_sync_store(struct gfs2_sbd *sdp, const char *buf,
 static ssize_t quota_refresh_user_store(struct gfs2_sbd *sdp, const char *buf,
                                        size_t len)
 {
+        struct kqid qid;
        int error;
        u32 id;
        if (!capable(CAP_SYS_ADMIN))
-                return -EACCES;
+                return -EPERM;
        id = simple_strtoul(buf, NULL, 0);
-        error = gfs2_quota_refresh(sdp, 1, id);
+        qid = make_kqid(current_user_ns(), USRQUOTA, id);
+        if (!qid_valid(qid))
+                return -EINVAL;
+        error = gfs2_quota_refresh(sdp, qid);
        return error ? error : len;
 }
 static ssize_t quota_refresh_group_store(struct gfs2_sbd *sdp, const char *buf,
                                         size_t len)
 {
+        struct kqid qid;
        int error;
        u32 id;
        if (!capable(CAP_SYS_ADMIN))
-                return -EACCES;
+                return -EPERM;
        id = simple_strtoul(buf, NULL, 0);
-        error = gfs2_quota_refresh(sdp, 0, id);
+        qid = make_kqid(current_user_ns(), GRPQUOTA, id);
+        if (!qid_valid(qid))
+                return -EINVAL;
+        error = gfs2_quota_refresh(sdp, qid);
        return error ? error : len;
 }
@@ -213,7 +221,7 @@ static ssize_t demote_rq_store(struct gfs2_sbd *sdp, const char *buf, size_t len
        int rv;
        if (!capable(CAP_SYS_ADMIN))
-                return -EACCES;
+                return -EPERM;
        rv = sscanf(buf, "%u:%llu %15s", &gltype, &glnum,
                    mode);
@@ -332,6 +340,28 @@ static ssize_t block_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
        return ret;
 }
+static ssize_t wdack_show(struct gfs2_sbd *sdp, char *buf)
+{
+        int val = completion_done(&sdp->sd_wdack) ? 1 : 0;
+        return sprintf(buf, "%d\n", val);
+}
+static ssize_t wdack_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
+{
+        ssize_t ret = len;
+        int val;
+        val = simple_strtol(buf, NULL, 0);
+        if ((val == 1) &&
+            !strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm"))
+                complete(&sdp->sd_wdack);
+        else
+                ret = -EINVAL;
+        return ret;
+}
 static ssize_t lkfirst_show(struct gfs2_sbd *sdp, char *buf)
 {
        struct lm_lockstruct *ls = &sdp->sd_lockstruct;
@@ -463,7 +493,7 @@ static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
 GDLM_ATTR(proto_name,           0444, proto_name_show,          NULL);
 GDLM_ATTR(block,                0644, block_show,               block_store);
-GDLM_ATTR(withdraw,             0644, withdraw_show,            withdraw_store);
+GDLM_ATTR(withdraw,             0644, wdack_show,               wdack_store);
 GDLM_ATTR(jid,                  0644, jid_show,                 jid_store);
 GDLM_ATTR(first,                0644, lkfirst_show,             lkfirst_store);
 GDLM_ATTR(first_done,           0444, first_done_show,          NULL);
@@ -502,7 +532,7 @@ static ssize_t quota_scale_store(struct gfs2_sbd *sdp, const char *buf,
        unsigned int x, y;
        if (!capable(CAP_SYS_ADMIN))
-                return -EACCES;
+                return -EPERM;
        if (sscanf(buf, "%u %u", &x, &y) != 2 || !y)
                return -EINVAL;
@@ -521,7 +551,7 @@ static ssize_t tune_set(struct gfs2_sbd *sdp, unsigned int *field,
        unsigned int x;
        if (!capable(CAP_SYS_ADMIN))
-                return -EACCES;
+                return -EPERM;
        x = simple_strtoul(buf, NULL, 0);
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index 413627072f36..88162fae27a5 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -18,6 +18,7 @@
 #include "gfs2.h"
 #include "incore.h"
 #include "glock.h"
+#include "inode.h"
 #include "log.h"
 #include "lops.h"
 #include "meta_io.h"
@@ -142,44 +143,143 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
        sb_end_intwrite(sdp->sd_vfs);
 }
+static struct gfs2_bufdata *gfs2_alloc_bufdata(struct gfs2_glock *gl,
+                                               struct buffer_head *bh,
+                                               const struct gfs2_log_operations *lops)
+{
+        struct gfs2_bufdata *bd;
+        bd = kmem_cache_zalloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL);
+        bd->bd_bh = bh;
+        bd->bd_gl = gl;
+        bd->bd_ops = lops;
+        INIT_LIST_HEAD(&bd->bd_list);
+        bh->b_private = bd;
+        return bd;
+}
 /**
- * gfs2_trans_add_bh - Add a to-be-modified buffer to the current transaction
+ * gfs2_trans_add_data - Add a databuf to the transaction.
- * @gl: the glock the buffer belongs to
+ * @gl: The inode glock associated with the buffer
 * @bh: The buffer to add
- * @meta: True in the case of adding metadata
 *
+ * This is used in two distinct cases:
+ * i) In ordered write mode
+ *    We put the data buffer on a list so that we can ensure that its
+ *    synced to disk at the right time
+ * ii) In journaled data mode
+ *    We need to journal the data block in the same way as metadata in
+ *    the functions above. The difference is that here we have a tag
+ *    which is two __be64's being the block number (as per meta data)
+ *    and a flag which says whether the data block needs escaping or
+ *    not. This means we need a new log entry for each 251 or so data
+ *    blocks, which isn't an enormous overhead but twice as much as
+ *    for normal metadata blocks.
 */
+void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh)
+{
+        struct gfs2_trans *tr = current->journal_info;
+        struct gfs2_sbd *sdp = gl->gl_sbd;
+        struct address_space *mapping = bh->b_page->mapping;
+        struct gfs2_inode *ip = GFS2_I(mapping->host);
+        struct gfs2_bufdata *bd;
-void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta)
+        if (!gfs2_is_jdata(ip)) {
+                gfs2_ordered_add_inode(ip);
+                return;
+        }
+        lock_buffer(bh);
+        gfs2_log_lock(sdp);
+        bd = bh->b_private;
+        if (bd == NULL) {
+                gfs2_log_unlock(sdp);
+                unlock_buffer(bh);
+                if (bh->b_private == NULL)
+                        bd = gfs2_alloc_bufdata(gl, bh, &gfs2_databuf_lops);
+                lock_buffer(bh);
+                gfs2_log_lock(sdp);
+        }
+        gfs2_assert(sdp, bd->bd_gl == gl);
+        tr->tr_touched = 1;
+        if (list_empty(&bd->bd_list)) {
+                set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
+                set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
+                gfs2_pin(sdp, bd->bd_bh);
+                tr->tr_num_databuf_new++;
+                sdp->sd_log_num_databuf++;
+                list_add_tail(&bd->bd_list, &sdp->sd_log_le_databuf);
+        }
+        gfs2_log_unlock(sdp);
+        unlock_buffer(bh);
+}
+static void meta_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
 {
+        struct gfs2_meta_header *mh;
+        struct gfs2_trans *tr;
+        tr = current->journal_info;
+        tr->tr_touched = 1;
+        if (!list_empty(&bd->bd_list))
+                return;
+        set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
+        set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
+        mh = (struct gfs2_meta_header *)bd->bd_bh->b_data;
+        if (unlikely(mh->mh_magic != cpu_to_be32(GFS2_MAGIC))) {
+                printk(KERN_ERR
+                       "Attempting to add uninitialised block to journal (inplace block=%lld)\n",
+                       (unsigned long long)bd->bd_bh->b_blocknr);
+                BUG();
+        }
+        gfs2_pin(sdp, bd->bd_bh);
+        mh->__pad0 = cpu_to_be64(0);
+        mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
+        sdp->sd_log_num_buf++;
+        list_add(&bd->bd_list, &sdp->sd_log_le_buf);
+        tr->tr_num_buf_new++;
+}
+void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh)
+{
        struct gfs2_sbd *sdp = gl->gl_sbd;
        struct gfs2_bufdata *bd;
        lock_buffer(bh);
        gfs2_log_lock(sdp);
        bd = bh->b_private;
-        if (bd)
+        if (bd == NULL) {
-                gfs2_assert(sdp, bd->bd_gl == gl);
-        else {
                gfs2_log_unlock(sdp);
                unlock_buffer(bh);
-                gfs2_attach_bufdata(gl, bh, meta);
+                lock_page(bh->b_page);
-                bd = bh->b_private;
+                if (bh->b_private == NULL)
+                        bd = gfs2_alloc_bufdata(gl, bh, &gfs2_buf_lops);
+                unlock_page(bh->b_page);
                lock_buffer(bh);
                gfs2_log_lock(sdp);
        }
-        lops_add(sdp, bd);
+        gfs2_assert(sdp, bd->bd_gl == gl);
+        meta_lo_add(sdp, bd);
        gfs2_log_unlock(sdp);
        unlock_buffer(bh);
 }
 void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
 {
+        struct gfs2_glock *gl = bd->bd_gl;
+        struct gfs2_trans *tr = current->journal_info;
        BUG_ON(!list_empty(&bd->bd_list));
        BUG_ON(!list_empty(&bd->bd_ail_st_list));
        BUG_ON(!list_empty(&bd->bd_ail_gl_list));
-        lops_init_le(bd, &gfs2_revoke_lops);
+        bd->bd_ops = &gfs2_revoke_lops;
-        lops_add(sdp, bd);
+        tr->tr_touched = 1;
+        tr->tr_num_revoke++;
+        sdp->sd_log_num_revoke++;
+        atomic_inc(&gl->gl_revokes);
+        set_bit(GLF_LFLUSH, &gl->gl_flags);
+        list_add(&bd->bd_list, &sdp->sd_log_le_revoke);
 }
 void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len)
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h
index bf2ae9aeee7a..1e6e7da25a17 100644
--- a/fs/gfs2/trans.h
+++ b/fs/gfs2/trans.h
@@ -39,7 +39,8 @@ extern int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
                            unsigned int revokes);
 extern void gfs2_trans_end(struct gfs2_sbd *sdp);
-extern void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta);
+extern void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh);
+extern void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh);
 extern void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
 extern void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len);
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index f00d7c5744f6..6402fb69d71b 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -54,6 +54,9 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...)
                kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE);
+                if (!strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm"))
+                        wait_for_completion(&sdp->sd_wdack);
                if (lm->lm_unmount) {
                        fs_err(sdp, "telling LM to unmount\n");
                        lm->lm_unmount(sdp);
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 76c144b3c9bb..ecd37f30ab91 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -270,7 +270,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
        if (error)
                goto out_gunlock;
-        gfs2_trans_add_bh(ip->i_gl, bh, 1);
+        gfs2_trans_add_meta(ip->i_gl, bh);
        dataptrs = GFS2_EA2DATAPTRS(ea);
        for (x = 0; x < ea->ea_num_ptrs; x++, dataptrs++) {
@@ -309,7 +309,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
        error = gfs2_meta_inode_buffer(ip, &dibh);
        if (!error) {
                ip->i_inode.i_ctime = CURRENT_TIME;
-                gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+                gfs2_trans_add_meta(ip->i_gl, dibh);
                gfs2_dinode_out(ip, dibh->b_data);
                brelse(dibh);
        }
@@ -331,7 +331,7 @@ static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
        if (error)
                return error;
-        error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
+        error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
        if (error)
                goto out_alloc;
@@ -509,7 +509,7 @@ static int gfs2_iter_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
                }
                if (din) {
-                        gfs2_trans_add_bh(ip->i_gl, bh[x], 1);
+                        gfs2_trans_add_meta(ip->i_gl, bh[x]);
                        memcpy(pos, din, cp_size);
                        din += sdp->sd_jbsize;
                }
@@ -629,7 +629,7 @@ static int ea_alloc_blk(struct gfs2_inode *ip, struct buffer_head **bhp)
                return error;
        gfs2_trans_add_unrevoke(sdp, block, 1);
        *bhp = gfs2_meta_new(ip->i_gl, block);
-        gfs2_trans_add_bh(ip->i_gl, *bhp, 1);
+        gfs2_trans_add_meta(ip->i_gl, *bhp);
        gfs2_metatype_set(*bhp, GFS2_METATYPE_EA, GFS2_FORMAT_EA);
        gfs2_buffer_clear_tail(*bhp, sizeof(struct gfs2_meta_header));
@@ -691,7 +691,7 @@ static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
                                return error;
                        gfs2_trans_add_unrevoke(sdp, block, 1);
                        bh = gfs2_meta_new(ip->i_gl, block);
-                        gfs2_trans_add_bh(ip->i_gl, bh, 1);
+                        gfs2_trans_add_meta(ip->i_gl, bh);
                        gfs2_metatype_set(bh, GFS2_METATYPE_ED, GFS2_FORMAT_ED);
                        gfs2_add_inode_blocks(&ip->i_inode, 1);
@@ -751,7 +751,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
        error = gfs2_meta_inode_buffer(ip, &dibh);
        if (!error) {
                ip->i_inode.i_ctime = CURRENT_TIME;
-                gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+                gfs2_trans_add_meta(ip->i_gl, dibh);
                gfs2_dinode_out(ip, dibh->b_data);
                brelse(dibh);
        }
@@ -834,7 +834,7 @@ static void ea_set_remove_stuffed(struct gfs2_inode *ip,
        struct gfs2_ea_header *prev = el->el_prev;
        u32 len;
-        gfs2_trans_add_bh(ip->i_gl, el->el_bh, 1);
+        gfs2_trans_add_meta(ip->i_gl, el->el_bh);
        if (!prev || !GFS2_EA_IS_STUFFED(ea)) {
                ea->ea_type = GFS2_EATYPE_UNUSED;
@@ -872,7 +872,7 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
        if (error)
                return error;
-        gfs2_trans_add_bh(ip->i_gl, bh, 1);
+        gfs2_trans_add_meta(ip->i_gl, bh);
        if (es->ea_split)
                ea = ea_split_ea(ea);
@@ -886,7 +886,7 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
        if (error)
                goto out;
        ip->i_inode.i_ctime = CURRENT_TIME;
-        gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+        gfs2_trans_add_meta(ip->i_gl, dibh);
        gfs2_dinode_out(ip, dibh->b_data);
        brelse(dibh);
 out:
@@ -901,7 +901,7 @@ static int ea_set_simple_alloc(struct gfs2_inode *ip,
        struct gfs2_ea_header *ea = es->es_ea;
        int error;
-        gfs2_trans_add_bh(ip->i_gl, es->es_bh, 1);
+        gfs2_trans_add_meta(ip->i_gl, es->es_bh);
        if (es->ea_split)
                ea = ea_split_ea(ea);
@@ -997,7 +997,7 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
                        goto out;
                }
-                gfs2_trans_add_bh(ip->i_gl, indbh, 1);
+                gfs2_trans_add_meta(ip->i_gl, indbh);
        } else {
                u64 blk;
                unsigned int n = 1;
@@ -1006,7 +1006,7 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
                        return error;
                gfs2_trans_add_unrevoke(sdp, blk, 1);
                indbh = gfs2_meta_new(ip->i_gl, blk);
-                gfs2_trans_add_bh(ip->i_gl, indbh, 1);
+                gfs2_trans_add_meta(ip->i_gl, indbh);
                gfs2_metatype_set(indbh, GFS2_METATYPE_IN, GFS2_FORMAT_IN);
                gfs2_buffer_clear_tail(indbh, mh_size);
@@ -1092,7 +1092,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
        if (error)
                return error;
-        gfs2_trans_add_bh(ip->i_gl, el->el_bh, 1);
+        gfs2_trans_add_meta(ip->i_gl, el->el_bh);
        if (prev) {
                u32 len;
@@ -1109,7 +1109,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
        error = gfs2_meta_inode_buffer(ip, &dibh);
        if (!error) {
                ip->i_inode.i_ctime = CURRENT_TIME;
-                gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+                gfs2_trans_add_meta(ip->i_gl, dibh);
                gfs2_dinode_out(ip, dibh->b_data);
                brelse(dibh);
        }
@@ -1265,7 +1265,7 @@ int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
        if (GFS2_EA_IS_STUFFED(el.el_ea)) {
                error = gfs2_trans_begin(sdp, RES_DINODE + RES_EATTR, 0);
                if (error == 0) {
-                        gfs2_trans_add_bh(ip->i_gl, el.el_bh, 1);
+                        gfs2_trans_add_meta(ip->i_gl, el.el_bh);
                        memcpy(GFS2_EA2DATA(el.el_ea), data,
                               GFS2_EA_DATA_LEN(el.el_ea));
                }
@@ -1352,7 +1352,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip)
        if (error)
                goto out_gunlock;
-        gfs2_trans_add_bh(ip->i_gl, indbh, 1);
+        gfs2_trans_add_meta(ip->i_gl, indbh);
        eablk = (__be64 *)(indbh->b_data + sizeof(struct gfs2_meta_header));
        bstart = 0;
@@ -1384,7 +1384,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip)
        error = gfs2_meta_inode_buffer(ip, &dibh);
        if (!error) {
-                gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+                gfs2_trans_add_meta(ip->i_gl, dibh);
                gfs2_dinode_out(ip, dibh->b_data);
                brelse(dibh);
        }
@@ -1434,7 +1434,7 @@ static int ea_dealloc_block(struct gfs2_inode *ip)
        error = gfs2_meta_inode_buffer(ip, &dibh);
        if (!error) {
-                gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+                gfs2_trans_add_meta(ip->i_gl, dibh);
                gfs2_dinode_out(ip, dibh->b_data);
                brelse(dibh);
        }
@@ -1461,7 +1461,7 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip)
        if (error)
                return error;
-        error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
+        error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
        if (error)
                return error;
diff --git a/fs/hfs/Kconfig b/fs/hfs/Kconfig
index b77c5bc20f8a..998e3a6decf3 100644
--- a/fs/hfs/Kconfig
+++ b/fs/hfs/Kconfig
@@ -1,6 +1,6 @@
 config HFS_FS
-        tristate "Apple Macintosh file system support (EXPERIMENTAL)"
+        tristate "Apple Macintosh file system support"
-        depends on BLOCK && EXPERIMENTAL
+        depends on BLOCK
        select NLS
        help
          If you say Y here, you will be able to mount Macintosh-formatted
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index 422dde2ec0a1..5f7f1abd5f6d 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -51,7 +51,7 @@ done:
 */
 static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct super_block *sb = inode->i_sb;
        int len, err;
        char strbuf[HFS_MAX_NAMELEN];
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index d47f11658c17..3031dfdd2358 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -128,7 +128,7 @@ static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb,
 {
        struct file *file = iocb->ki_filp;
        struct address_space *mapping = file->f_mapping;
-        struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host;
+        struct inode *inode = file_inode(file)->i_mapping->host;
        ssize_t ret;
        ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
diff --git a/fs/hfsplus/Makefile b/fs/hfsplus/Makefile
index 3cc0df730156..09d278bb7b91 100644
--- a/fs/hfsplus/Makefile
+++ b/fs/hfsplus/Makefile
@@ -5,5 +5,5 @@
 obj-$(CONFIG_HFSPLUS_FS) += hfsplus.o
 hfsplus-objs := super.o options.o inode.o ioctl.o extents.o catalog.o dir.o btree.o \
-                bnode.o brec.o bfind.o tables.o unicode.o wrapper.o bitmap.o part_tbl.o
+                bnode.o brec.o bfind.o tables.o unicode.o wrapper.o bitmap.o part_tbl.o \
+                attributes.o xattr.o xattr_user.o xattr_security.o xattr_trusted.o
diff --git a/fs/hfsplus/attributes.c b/fs/hfsplus/attributes.c
new file mode 100644
index 000000000000..8d691f124714
--- /dev/null
+++ b/fs/hfsplus/attributes.c
@@ -0,0 +1,399 @@
+/*
+ * linux/fs/hfsplus/attributes.c
+ *
+ * Vyacheslav Dubeyko <slava@dubeyko.com>
+ *
+ * Handling of records in attributes tree
+ */
+#include "hfsplus_fs.h"
+#include "hfsplus_raw.h"
+static struct kmem_cache *hfsplus_attr_tree_cachep;
+int hfsplus_create_attr_tree_cache(void)
+{
+        if (hfsplus_attr_tree_cachep)
+                return -EEXIST;
+        hfsplus_attr_tree_cachep =
+                kmem_cache_create("hfsplus_attr_cache",
+                        sizeof(hfsplus_attr_entry), 0,
+                        SLAB_HWCACHE_ALIGN, NULL);
+        if (!hfsplus_attr_tree_cachep)
+                return -ENOMEM;
+        return 0;
+}
+void hfsplus_destroy_attr_tree_cache(void)
+{
+        kmem_cache_destroy(hfsplus_attr_tree_cachep);
+}
+int hfsplus_attr_bin_cmp_key(const hfsplus_btree_key *k1,
+                                const hfsplus_btree_key *k2)
+{
+        __be32 k1_cnid, k2_cnid;
+        k1_cnid = k1->attr.cnid;
+        k2_cnid = k2->attr.cnid;
+        if (k1_cnid != k2_cnid)
+                return be32_to_cpu(k1_cnid) < be32_to_cpu(k2_cnid) ? -1 : 1;
+        return hfsplus_strcmp(
+                        (const struct hfsplus_unistr *)&k1->attr.key_name,
+                        (const struct hfsplus_unistr *)&k2->attr.key_name);
+}
+int hfsplus_attr_build_key(struct super_block *sb, hfsplus_btree_key *key,
+                        u32 cnid, const char *name)
+{
+        int len;
+        memset(key, 0, sizeof(struct hfsplus_attr_key));
+        key->attr.cnid = cpu_to_be32(cnid);
+        if (name) {
+                len = strlen(name);
+                if (len > HFSPLUS_ATTR_MAX_STRLEN) {
+                        printk(KERN_ERR "hfs: invalid xattr name's length\n");
+                        return -EINVAL;
+                }
+                hfsplus_asc2uni(sb,
+                                (struct hfsplus_unistr *)&key->attr.key_name,
+                                HFSPLUS_ATTR_MAX_STRLEN, name, len);
+                len = be16_to_cpu(key->attr.key_name.length);
+        } else {
+                key->attr.key_name.length = 0;
+                len = 0;
+        }
+        /* The length of the key, as stored in key_len field, does not include
+         * the size of the key_len field itself.
+         * So, offsetof(hfsplus_attr_key, key_name) is a trick because
+         * it takes into consideration key_len field (__be16) of
+         * hfsplus_attr_key structure instead of length field (__be16) of
+         * hfsplus_attr_unistr structure.
+         */
+        key->key_len =
+                cpu_to_be16(offsetof(struct hfsplus_attr_key, key_name) +
+                                2 * len);
+        return 0;
+}
+void hfsplus_attr_build_key_uni(hfsplus_btree_key *key,
+                                        u32 cnid,
+                                        struct hfsplus_attr_unistr *name)
+{
+        int ustrlen;
+        memset(key, 0, sizeof(struct hfsplus_attr_key));
+        ustrlen = be16_to_cpu(name->length);
+        key->attr.cnid = cpu_to_be32(cnid);
+        key->attr.key_name.length = cpu_to_be16(ustrlen);
+        ustrlen *= 2;
+        memcpy(key->attr.key_name.unicode, name->unicode, ustrlen);
+        /* The length of the key, as stored in key_len field, does not include
+         * the size of the key_len field itself.
+         * So, offsetof(hfsplus_attr_key, key_name) is a trick because
+         * it takes into consideration key_len field (__be16) of
+         * hfsplus_attr_key structure instead of length field (__be16) of
+         * hfsplus_attr_unistr structure.
+         */
+        key->key_len =
+                cpu_to_be16(offsetof(struct hfsplus_attr_key, key_name) +
+                                ustrlen);
+}
+hfsplus_attr_entry *hfsplus_alloc_attr_entry(void)
+{
+        return kmem_cache_alloc(hfsplus_attr_tree_cachep, GFP_KERNEL);
+}
+void hfsplus_destroy_attr_entry(hfsplus_attr_entry *entry)
+{
+        if (entry)
+                kmem_cache_free(hfsplus_attr_tree_cachep, entry);
+}
+#define HFSPLUS_INVALID_ATTR_RECORD -1
+static int hfsplus_attr_build_record(hfsplus_attr_entry *entry, int record_type,
+                                u32 cnid, const void *value, size_t size)
+{
+        if (record_type == HFSPLUS_ATTR_FORK_DATA) {
+                /*
+                 * Mac OS X supports only inline data attributes.
+                 * Do nothing
+                 */
+                memset(entry, 0, sizeof(*entry));
+                return sizeof(struct hfsplus_attr_fork_data);
+        } else if (record_type == HFSPLUS_ATTR_EXTENTS) {
+                /*
+                 * Mac OS X supports only inline data attributes.
+                 * Do nothing.
+                 */
+                memset(entry, 0, sizeof(*entry));
+                return sizeof(struct hfsplus_attr_extents);
+        } else if (record_type == HFSPLUS_ATTR_INLINE_DATA) {
+                u16 len;
+                memset(entry, 0, sizeof(struct hfsplus_attr_inline_data));
+                entry->inline_data.record_type = cpu_to_be32(record_type);
+                if (size <= HFSPLUS_MAX_INLINE_DATA_SIZE)
+                        len = size;
+                else
+                        return HFSPLUS_INVALID_ATTR_RECORD;
+                entry->inline_data.length = cpu_to_be16(len);
+                memcpy(entry->inline_data.raw_bytes, value, len);
+                /*
+                 * Align len on two-byte boundary.
+                 * It needs to add pad byte if we have odd len.
+                 */
+                len = round_up(len, 2);
+                return offsetof(struct hfsplus_attr_inline_data, raw_bytes) +
+                                        len;
+        } else /* invalid input */
+                memset(entry, 0, sizeof(*entry));
+        return HFSPLUS_INVALID_ATTR_RECORD;
+}
+int hfsplus_find_attr(struct super_block *sb, u32 cnid,
+                        const char *name, struct hfs_find_data *fd)
+{
+        int err = 0;
+        dprint(DBG_ATTR_MOD, "find_attr: %s,%d\n", name ? name : NULL, cnid);
+        if (!HFSPLUS_SB(sb)->attr_tree) {
+                printk(KERN_ERR "hfs: attributes file doesn't exist\n");
+                return -EINVAL;
+        }
+        if (name) {
+                err = hfsplus_attr_build_key(sb, fd->search_key, cnid, name);
+                if (err)
+                        goto failed_find_attr;
+                err = hfs_brec_find(fd, hfs_find_rec_by_key);
+                if (err)
+                        goto failed_find_attr;
+        } else {
+                err = hfsplus_attr_build_key(sb, fd->search_key, cnid, NULL);
+                if (err)
+                        goto failed_find_attr;
+                err = hfs_brec_find(fd, hfs_find_1st_rec_by_cnid);
+                if (err)
+                        goto failed_find_attr;
+        }
+failed_find_attr:
+        return err;
+}
+int hfsplus_attr_exists(struct inode *inode, const char *name)
+{
+        int err = 0;
+        struct super_block *sb = inode->i_sb;
+        struct hfs_find_data fd;
+        if (!HFSPLUS_SB(sb)->attr_tree)
+                return 0;
+        err = hfs_find_init(HFSPLUS_SB(sb)->attr_tree, &fd);
+        if (err)
+                return 0;
+        err = hfsplus_find_attr(sb, inode->i_ino, name, &fd);
+        if (err)
+                goto attr_not_found;
+        hfs_find_exit(&fd);
+        return 1;
+attr_not_found:
+        hfs_find_exit(&fd);
+        return 0;
+}
+int hfsplus_create_attr(struct inode *inode,
+                                const char *name,
+                                const void *value, size_t size)
+{
+        struct super_block *sb = inode->i_sb;
+        struct hfs_find_data fd;
+        hfsplus_attr_entry *entry_ptr;
+        int entry_size;
+        int err;
+        dprint(DBG_ATTR_MOD, "create_attr: %s,%ld\n",
+                name ? name : NULL, inode->i_ino);
+        if (!HFSPLUS_SB(sb)->attr_tree) {
+                printk(KERN_ERR "hfs: attributes file doesn't exist\n");
+                return -EINVAL;
+        }
+        entry_ptr = hfsplus_alloc_attr_entry();
+        if (!entry_ptr)
+                return -ENOMEM;
+        err = hfs_find_init(HFSPLUS_SB(sb)->attr_tree, &fd);
+        if (err)
+                goto failed_init_create_attr;
+        if (name) {
+                err = hfsplus_attr_build_key(sb, fd.search_key,
+                                                inode->i_ino, name);
+                if (err)
+                        goto failed_create_attr;
+        } else {
+                err = -EINVAL;
+                goto failed_create_attr;
+        }
+        /* Mac OS X supports only inline data attributes. */
+        entry_size = hfsplus_attr_build_record(entry_ptr,
+                                        HFSPLUS_ATTR_INLINE_DATA,
+                                        inode->i_ino,
+                                        value, size);
+        if (entry_size == HFSPLUS_INVALID_ATTR_RECORD) {
+                err = -EINVAL;
+                goto failed_create_attr;
+        }
+        err = hfs_brec_find(&fd, hfs_find_rec_by_key);
+        if (err != -ENOENT) {
+                if (!err)
+                        err = -EEXIST;
+                goto failed_create_attr;
+        }
+        err = hfs_brec_insert(&fd, entry_ptr, entry_size);
+        if (err)
+                goto failed_create_attr;
+        hfsplus_mark_inode_dirty(inode, HFSPLUS_I_ATTR_DIRTY);
+failed_create_attr:
+        hfs_find_exit(&fd);
+failed_init_create_attr:
+        hfsplus_destroy_attr_entry(entry_ptr);
+        return err;
+}
+static int __hfsplus_delete_attr(struct inode *inode, u32 cnid,
+                                        struct hfs_find_data *fd)
+{
+        int err = 0;
+        __be32 found_cnid, record_type;
+        hfs_bnode_read(fd->bnode, &found_cnid,
+                        fd->keyoffset +
+                        offsetof(struct hfsplus_attr_key, cnid),
+                        sizeof(__be32));
+        if (cnid != be32_to_cpu(found_cnid))
+                return -ENOENT;
+        hfs_bnode_read(fd->bnode, &record_type,
+                        fd->entryoffset, sizeof(record_type));
+        switch (be32_to_cpu(record_type)) {
+        case HFSPLUS_ATTR_INLINE_DATA:
+                /* All is OK. Do nothing. */
+                break;
+        case HFSPLUS_ATTR_FORK_DATA:
+        case HFSPLUS_ATTR_EXTENTS:
+                printk(KERN_ERR "hfs: only inline data xattr are supported\n");
+                return -EOPNOTSUPP;
+        default:
+                printk(KERN_ERR "hfs: invalid extended attribute record\n");
+                return -ENOENT;
+        }
+        err = hfs_brec_remove(fd);
+        if (err)
+                return err;
+        hfsplus_mark_inode_dirty(inode, HFSPLUS_I_ATTR_DIRTY);
+        return err;
+}
+int hfsplus_delete_attr(struct inode *inode, const char *name)
+{
+        int err = 0;
+        struct super_block *sb = inode->i_sb;
+        struct hfs_find_data fd;
+        dprint(DBG_ATTR_MOD, "delete_attr: %s,%ld\n",
+                name ? name : NULL, inode->i_ino);
+        if (!HFSPLUS_SB(sb)->attr_tree) {
+                printk(KERN_ERR "hfs: attributes file doesn't exist\n");
+                return -EINVAL;
+        }
+        err = hfs_find_init(HFSPLUS_SB(sb)->attr_tree, &fd);
+        if (err)
+                return err;
+        if (name) {
+                err = hfsplus_attr_build_key(sb, fd.search_key,
+                                                inode->i_ino, name);
+                if (err)
+                        goto out;
+        } else {
+                printk(KERN_ERR "hfs: invalid extended attribute name\n");
+                err = -EINVAL;
+                goto out;
+        }
+        err = hfs_brec_find(&fd, hfs_find_rec_by_key);
+        if (err)
+                goto out;
+        err = __hfsplus_delete_attr(inode, inode->i_ino, &fd);
+        if (err)
+                goto out;
+out:
+        hfs_find_exit(&fd);
+        return err;
+}
+int hfsplus_delete_all_attrs(struct inode *dir, u32 cnid)
+{
+        int err = 0;
+        struct hfs_find_data fd;
+        dprint(DBG_ATTR_MOD, "delete_all_attrs: %d\n", cnid);
+        if (!HFSPLUS_SB(dir->i_sb)->attr_tree) {
+                printk(KERN_ERR "hfs: attributes file doesn't exist\n");
+                return -EINVAL;
+        }
+        err = hfs_find_init(HFSPLUS_SB(dir->i_sb)->attr_tree, &fd);
+        if (err)
+                return err;
+        for (;;) {
+                err = hfsplus_find_attr(dir->i_sb, cnid, NULL, &fd);
+                if (err) {
+                        if (err != -ENOENT)
+                                printk(KERN_ERR "hfs: xattr search failed.\n");
+                        goto end_delete_all;
+                }
+                err = __hfsplus_delete_attr(dir, cnid, &fd);
+                if (err)
+                        goto end_delete_all;
+        }
+end_delete_all:
+        hfs_find_exit(&fd);
+        return err;
+}
diff --git a/fs/hfsplus/bfind.c b/fs/hfsplus/bfind.c
index 5d799c13205f..d73c98d1ee99 100644
--- a/fs/hfsplus/bfind.c
+++ b/fs/hfsplus/bfind.c
@@ -24,7 +24,19 @@ int hfs_find_init(struct hfs_btree *tree, struct hfs_find_data *fd)
        fd->key = ptr + tree->max_key_len + 2;
        dprint(DBG_BNODE_REFS, "find_init: %d (%p)\n",
                tree->cnid, __builtin_return_address(0));
-        mutex_lock(&tree->tree_lock);
+        switch (tree->cnid) {
+        case HFSPLUS_CAT_CNID:
+                mutex_lock_nested(&tree->tree_lock, CATALOG_BTREE_MUTEX);
+                break;
+        case HFSPLUS_EXT_CNID:
+                mutex_lock_nested(&tree->tree_lock, EXTENTS_BTREE_MUTEX);
+                break;
+        case HFSPLUS_ATTR_CNID:
+                mutex_lock_nested(&tree->tree_lock, ATTR_BTREE_MUTEX);
+                break;
+        default:
+                BUG();
+        }
        return 0;
 }
@@ -38,15 +50,73 @@ void hfs_find_exit(struct hfs_find_data *fd)
        fd->tree = NULL;
 }
-/* Find the record in bnode that best matches key (not greater than...)*/
+int hfs_find_1st_rec_by_cnid(struct hfs_bnode *bnode,
-int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd)
+                                struct hfs_find_data *fd,
+                                int *begin,
+                                int *end,
+                                int *cur_rec)
+{
+        __be32 cur_cnid, search_cnid;
+        if (bnode->tree->cnid == HFSPLUS_EXT_CNID) {
+                cur_cnid = fd->key->ext.cnid;
+                search_cnid = fd->search_key->ext.cnid;
+        } else if (bnode->tree->cnid == HFSPLUS_CAT_CNID) {
+                cur_cnid = fd->key->cat.parent;
+                search_cnid = fd->search_key->cat.parent;
+        } else if (bnode->tree->cnid == HFSPLUS_ATTR_CNID) {
+                cur_cnid = fd->key->attr.cnid;
+                search_cnid = fd->search_key->attr.cnid;
+        } else
+                BUG();
+        if (cur_cnid == search_cnid) {
+                (*end) = (*cur_rec);
+                if ((*begin) == (*end))
+                        return 1;
+        } else {
+                if (be32_to_cpu(cur_cnid) < be32_to_cpu(search_cnid))
+                        (*begin) = (*cur_rec) + 1;
+                else
+                        (*end) = (*cur_rec) - 1;
+        }
+        return 0;
+}
+int hfs_find_rec_by_key(struct hfs_bnode *bnode,
+                                struct hfs_find_data *fd,
+                                int *begin,
+                                int *end,
+                                int *cur_rec)
 {
        int cmpval;
+        cmpval = bnode->tree->keycmp(fd->key, fd->search_key);
+        if (!cmpval) {
+                (*end) = (*cur_rec);
+                return 1;
+        }
+        if (cmpval < 0)
+                (*begin) = (*cur_rec) + 1;
+        else
+                *(end) = (*cur_rec) - 1;
+        return 0;
+}
+/* Find the record in bnode that best matches key (not greater than...)*/
+int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd,
+                                        search_strategy_t rec_found)
+{
        u16 off, len, keylen;
        int rec;
        int b, e;
        int res;
+        if (!rec_found)
+                BUG();
        b = 0;
        e = bnode->num_recs - 1;
        res = -ENOENT;
@@ -59,17 +129,12 @@ int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd)
                        goto fail;
                }
                hfs_bnode_read(bnode, fd->key, off, keylen);
-                cmpval = bnode->tree->keycmp(fd->key, fd->search_key);
+                if (rec_found(bnode, fd, &b, &e, &rec)) {
-                if (!cmpval) {
-                        e = rec;
                        res = 0;
                        goto done;
                }
-                if (cmpval < 0)
-                        b = rec + 1;
-                else
-                        e = rec - 1;
        } while (b <= e);
        if (rec != e && e >= 0) {
                len = hfs_brec_lenoff(bnode, e, &off);
                keylen = hfs_brec_keylen(bnode, e);
@@ -79,19 +144,21 @@ int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd)
                }
                hfs_bnode_read(bnode, fd->key, off, keylen);
        }
 done:
        fd->record = e;
        fd->keyoffset = off;
        fd->keylength = keylen;
        fd->entryoffset = off + keylen;
        fd->entrylength = len - keylen;
 fail:
        return res;
 }
 /* Traverse a B*Tree from the root to a leaf finding best fit to key */
 /* Return allocated copy of node found, set recnum to best record */
-int hfs_brec_find(struct hfs_find_data *fd)
+int hfs_brec_find(struct hfs_find_data *fd, search_strategy_t do_key_compare)
 {
        struct hfs_btree *tree;
        struct hfs_bnode *bnode;
@@ -122,7 +189,7 @@ int hfs_brec_find(struct hfs_find_data *fd)
                        goto invalid;
                bnode->parent = parent;
-                res = __hfs_brec_find(bnode, fd);
+                res = __hfs_brec_find(bnode, fd, do_key_compare);
                if (!height)
                        break;
                if (fd->record < 0)
@@ -149,7 +216,7 @@ int hfs_brec_read(struct hfs_find_data *fd, void *rec, int rec_len)
 {
        int res;
-        res = hfs_brec_find(fd);
+        res = hfs_brec_find(fd, hfs_find_rec_by_key);
        if (res)
                return res;
        if (fd->entrylength > rec_len)
diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c
index 1c42cc5b899f..f31ac6f404f1 100644
--- a/fs/hfsplus/bnode.c
+++ b/fs/hfsplus/bnode.c
@@ -62,7 +62,8 @@ void hfs_bnode_read_key(struct hfs_bnode *node, void *key, int off)
        tree = node->tree;
        if (node->type == HFS_NODE_LEAF ||
-            tree->attributes & HFS_TREE_VARIDXKEYS)
+            tree->attributes & HFS_TREE_VARIDXKEYS ||
+            node->tree->cnid == HFSPLUS_ATTR_CNID)
                key_len = hfs_bnode_read_u16(node, off) + 2;
        else
                key_len = tree->max_key_len + 2;
@@ -314,7 +315,8 @@ void hfs_bnode_dump(struct hfs_bnode *node)
                if (i && node->type == HFS_NODE_INDEX) {
                        int tmp;
-                        if (node->tree->attributes & HFS_TREE_VARIDXKEYS)
+                        if (node->tree->attributes & HFS_TREE_VARIDXKEYS ||
+                                        node->tree->cnid == HFSPLUS_ATTR_CNID)
                                tmp = hfs_bnode_read_u16(node, key_off) + 2;
                        else
                                tmp = node->tree->max_key_len + 2;
@@ -646,6 +648,8 @@ void hfs_bnode_put(struct hfs_bnode *node)
                if (test_bit(HFS_BNODE_DELETED, &node->flags)) {
                        hfs_bnode_unhash(node);
                        spin_unlock(&tree->hash_lock);
+                        hfs_bnode_clear(node, 0,
+                                PAGE_CACHE_SIZE * tree->pages_per_bnode);
                        hfs_bmap_free(node);
                        hfs_bnode_free(node);
                        return;
diff --git a/fs/hfsplus/brec.c b/fs/hfsplus/brec.c
index 2a734cfccc92..298d4e45604b 100644
--- a/fs/hfsplus/brec.c
+++ b/fs/hfsplus/brec.c
@@ -36,7 +36,8 @@ u16 hfs_brec_keylen(struct hfs_bnode *node, u16 rec)
                return 0;
        if ((node->type == HFS_NODE_INDEX) &&
-           !(node->tree->attributes & HFS_TREE_VARIDXKEYS)) {
+           !(node->tree->attributes & HFS_TREE_VARIDXKEYS) &&
+           (node->tree->cnid != HFSPLUS_ATTR_CNID)) {
                retval = node->tree->max_key_len + 2;
        } else {
                recoff = hfs_bnode_read_u16(node,
@@ -151,12 +152,13 @@ skip:
                /* get index key */
                hfs_bnode_read_key(new_node, fd->search_key, 14);
-                __hfs_brec_find(fd->bnode, fd);
+                __hfs_brec_find(fd->bnode, fd, hfs_find_rec_by_key);
                hfs_bnode_put(new_node);
                new_node = NULL;
-                if (tree->attributes & HFS_TREE_VARIDXKEYS)
+                if ((tree->attributes & HFS_TREE_VARIDXKEYS) ||
+                                (tree->cnid == HFSPLUS_ATTR_CNID))
                        key_len = be16_to_cpu(fd->search_key->key_len) + 2;
                else {
                        fd->search_key->key_len =
@@ -201,7 +203,7 @@ again:
                hfs_bnode_put(node);
                node = fd->bnode = parent;
-                __hfs_brec_find(node, fd);
+                __hfs_brec_find(node, fd, hfs_find_rec_by_key);
                goto again;
        }
        hfs_bnode_write_u16(node,
@@ -367,12 +369,13 @@ again:
        parent = hfs_bnode_find(tree, node->parent);
        if (IS_ERR(parent))
                return PTR_ERR(parent);
-        __hfs_brec_find(parent, fd);
+        __hfs_brec_find(parent, fd, hfs_find_rec_by_key);
        hfs_bnode_dump(parent);
        rec = fd->record;
        /* size difference between old and new key */
-        if (tree->attributes & HFS_TREE_VARIDXKEYS)
+        if ((tree->attributes & HFS_TREE_VARIDXKEYS) ||
+                                (tree->cnid == HFSPLUS_ATTR_CNID))
                newkeylen = hfs_bnode_read_u16(node, 14) + 2;
        else
                fd->keylength = newkeylen = tree->max_key_len + 2;
@@ -427,7 +430,7 @@ skip:
                hfs_bnode_read_key(new_node, fd->search_key, 14);
                cnid = cpu_to_be32(new_node->this);
-                __hfs_brec_find(fd->bnode, fd);
+                __hfs_brec_find(fd->bnode, fd, hfs_find_rec_by_key);
                hfs_brec_insert(fd, &cnid, sizeof(cnid));
                hfs_bnode_put(fd->bnode);
                hfs_bnode_put(new_node);
@@ -495,13 +498,15 @@ static int hfs_btree_inc_height(struct hfs_btree *tree)
                /* insert old root idx into new root */
                node->parent = tree->root;
                if (node->type == HFS_NODE_LEAF ||
-                    tree->attributes & HFS_TREE_VARIDXKEYS)
+                                tree->attributes & HFS_TREE_VARIDXKEYS ||
+                                tree->cnid == HFSPLUS_ATTR_CNID)
                        key_size = hfs_bnode_read_u16(node, 14) + 2;
                else
                        key_size = tree->max_key_len + 2;
                hfs_bnode_copy(new_node, 14, node, 14, key_size);
-                if (!(tree->attributes & HFS_TREE_VARIDXKEYS)) {
+                if (!(tree->attributes & HFS_TREE_VARIDXKEYS) &&
+                                (tree->cnid != HFSPLUS_ATTR_CNID)) {
                        key_size = tree->max_key_len + 2;
                        hfs_bnode_write_u16(new_node, 14, tree->max_key_len);
                }
diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c
index 685d07d0ed18..efb689c21a95 100644
--- a/fs/hfsplus/btree.c
+++ b/fs/hfsplus/btree.c
@@ -98,6 +98,14 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id)
                        set_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
                }
                break;
+        case HFSPLUS_ATTR_CNID:
+                if (tree->max_key_len != HFSPLUS_ATTR_KEYLEN - sizeof(u16)) {
+                        printk(KERN_ERR "hfs: invalid attributes max_key_len %d\n",
+                                tree->max_key_len);
+                        goto fail_page;
+                }
+                tree->keycmp = hfsplus_attr_bin_cmp_key;
+                break;
        default:
                printk(KERN_ERR "hfs: unknown B*Tree requested\n");
                goto fail_page;
diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c
index 798d9c4c5e71..840d71edd193 100644
--- a/fs/hfsplus/catalog.c
+++ b/fs/hfsplus/catalog.c
@@ -45,7 +45,8 @@ void hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key,
        key->cat.parent = cpu_to_be32(parent);
        if (str) {
-                hfsplus_asc2uni(sb, &key->cat.name, str->name, str->len);
+                hfsplus_asc2uni(sb, &key->cat.name, HFSPLUS_MAX_STRLEN,
+                                        str->name, str->len);
                len = be16_to_cpu(key->cat.name.length);
        } else {
                key->cat.name.length = 0;
@@ -167,7 +168,8 @@ static int hfsplus_fill_cat_thread(struct super_block *sb,
        entry->type = cpu_to_be16(type);
        entry->thread.reserved = 0;
        entry->thread.parentID = cpu_to_be32(parentid);
-        hfsplus_asc2uni(sb, &entry->thread.nodeName, str->name, str->len);
+        hfsplus_asc2uni(sb, &entry->thread.nodeName, HFSPLUS_MAX_STRLEN,
+                                str->name, str->len);
        return 10 + be16_to_cpu(entry->thread.nodeName.length) * 2;
 }
@@ -198,7 +200,7 @@ int hfsplus_find_cat(struct super_block *sb, u32 cnid,
        hfsplus_cat_build_key_uni(fd->search_key,
                be32_to_cpu(tmp.thread.parentID),
                &tmp.thread.nodeName);
-        return hfs_brec_find(fd);
+        return hfs_brec_find(fd, hfs_find_rec_by_key);
 }
 int hfsplus_create_cat(u32 cnid, struct inode *dir,
@@ -221,7 +223,7 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir,
                S_ISDIR(inode->i_mode) ?
                        HFSPLUS_FOLDER_THREAD : HFSPLUS_FILE_THREAD,
                dir->i_ino, str);
-        err = hfs_brec_find(&fd);
+        err = hfs_brec_find(&fd, hfs_find_rec_by_key);
        if (err != -ENOENT) {
                if (!err)
                        err = -EEXIST;
@@ -233,7 +235,7 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir,
        hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
        entry_size = hfsplus_cat_build_record(&entry, cnid, inode);
-        err = hfs_brec_find(&fd);
+        err = hfs_brec_find(&fd, hfs_find_rec_by_key);
        if (err != -ENOENT) {
                /* panic? */
                if (!err)
@@ -253,7 +255,7 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir,
 err1:
        hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
-        if (!hfs_brec_find(&fd))
+        if (!hfs_brec_find(&fd, hfs_find_rec_by_key))
                hfs_brec_remove(&fd);
 err2:
        hfs_find_exit(&fd);
@@ -279,7 +281,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
                int len;
                hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
-                err = hfs_brec_find(&fd);
+                err = hfs_brec_find(&fd, hfs_find_rec_by_key);
                if (err)
                        goto out;
@@ -296,7 +298,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
        } else
                hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
-        err = hfs_brec_find(&fd);
+        err = hfs_brec_find(&fd, hfs_find_rec_by_key);
        if (err)
                goto out;
@@ -326,7 +328,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
                goto out;
        hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
-        err = hfs_brec_find(&fd);
+        err = hfs_brec_find(&fd, hfs_find_rec_by_key);
        if (err)
                goto out;
@@ -337,6 +339,12 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
        dir->i_size--;
        dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
        hfsplus_mark_inode_dirty(dir, HFSPLUS_I_CAT_DIRTY);
+        if (type == HFSPLUS_FILE || type == HFSPLUS_FOLDER) {
+                if (HFSPLUS_SB(sb)->attr_tree)
+                        hfsplus_delete_all_attrs(dir, cnid);
+        }
 out:
        hfs_find_exit(&fd);
@@ -363,7 +371,7 @@ int hfsplus_rename_cat(u32 cnid,
        /* find the old dir entry and read the data */
        hfsplus_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name);
-        err = hfs_brec_find(&src_fd);
+        err = hfs_brec_find(&src_fd, hfs_find_rec_by_key);
        if (err)
                goto out;
        if (src_fd.entrylength > sizeof(entry) || src_fd.entrylength < 0) {
@@ -376,7 +384,7 @@ int hfsplus_rename_cat(u32 cnid,
        /* create new dir entry with the data from the old entry */
        hfsplus_cat_build_key(sb, dst_fd.search_key, dst_dir->i_ino, dst_name);
-        err = hfs_brec_find(&dst_fd);
+        err = hfs_brec_find(&dst_fd, hfs_find_rec_by_key);
        if (err != -ENOENT) {
                if (!err)
                        err = -EEXIST;
@@ -391,7 +399,7 @@ int hfsplus_rename_cat(u32 cnid,
        /* finally remove the old entry */
        hfsplus_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name);
-        err = hfs_brec_find(&src_fd);
+        err = hfs_brec_find(&src_fd, hfs_find_rec_by_key);
        if (err)
                goto out;
        err = hfs_brec_remove(&src_fd);
@@ -402,7 +410,7 @@ int hfsplus_rename_cat(u32 cnid,
        /* remove old thread entry */
        hfsplus_cat_build_key(sb, src_fd.search_key, cnid, NULL);
-        err = hfs_brec_find(&src_fd);
+        err = hfs_brec_find(&src_fd, hfs_find_rec_by_key);
        if (err)
                goto out;
        type = hfs_bnode_read_u16(src_fd.bnode, src_fd.entryoffset);
@@ -414,7 +422,7 @@ int hfsplus_rename_cat(u32 cnid,
        hfsplus_cat_build_key(sb, dst_fd.search_key, cnid, NULL);
        entry_size = hfsplus_fill_cat_thread(sb, &entry, type,
                dst_dir->i_ino, dst_name);
-        err = hfs_brec_find(&dst_fd);
+        err = hfs_brec_find(&dst_fd, hfs_find_rec_by_key);
        if (err != -ENOENT) {
                if (!err)
                        err = -EEXIST;
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 6b9f921ef2fa..031c24e50521 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -15,6 +15,7 @@
 #include "hfsplus_fs.h"
 #include "hfsplus_raw.h"
+#include "xattr.h"
 static inline void hfsplus_instantiate(struct dentry *dentry,
                                       struct inode *inode, u32 cnid)
@@ -122,7 +123,7 @@ fail:
 static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct super_block *sb = inode->i_sb;
        int len, err;
        char strbuf[HFSPLUS_MAX_STRLEN + 1];
@@ -138,7 +139,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir)
        if (err)
                return err;
        hfsplus_cat_build_key(sb, fd.search_key, inode->i_ino, NULL);
-        err = hfs_brec_find(&fd);
+        err = hfs_brec_find(&fd, hfs_find_rec_by_key);
        if (err)
                goto out;
@@ -421,6 +422,15 @@ static int hfsplus_symlink(struct inode *dir, struct dentry *dentry,
        if (res)
                goto out_err;
+        res = hfsplus_init_inode_security(inode, dir, &dentry->d_name);
+        if (res == -EOPNOTSUPP)
+                res = 0; /* Operation is not supported. */
+        else if (res) {
+                /* Try to delete anyway without error analysis. */
+                hfsplus_delete_cat(inode->i_ino, dir, &dentry->d_name);
+                goto out_err;
+        }
        hfsplus_instantiate(dentry, inode, inode->i_ino);
        mark_inode_dirty(inode);
        goto out;
@@ -450,15 +460,26 @@ static int hfsplus_mknod(struct inode *dir, struct dentry *dentry,
                init_special_inode(inode, mode, rdev);
        res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode);
-        if (res) {
+        if (res)
-                clear_nlink(inode);
+                goto failed_mknod;
-                hfsplus_delete_inode(inode);
-                iput(inode);
+        res = hfsplus_init_inode_security(inode, dir, &dentry->d_name);
-                goto out;
+        if (res == -EOPNOTSUPP)
+                res = 0; /* Operation is not supported. */
+        else if (res) {
+                /* Try to delete anyway without error analysis. */
+                hfsplus_delete_cat(inode->i_ino, dir, &dentry->d_name);
+                goto failed_mknod;
        }
        hfsplus_instantiate(dentry, inode, inode->i_ino);
        mark_inode_dirty(inode);
+        goto out;
+failed_mknod:
+        clear_nlink(inode);
+        hfsplus_delete_inode(inode);
+        iput(inode);
 out:
        mutex_unlock(&sbi->vh_mutex);
        return res;
@@ -499,15 +520,19 @@ static int hfsplus_rename(struct inode *old_dir, struct dentry *old_dentry,
 }
 const struct inode_operations hfsplus_dir_inode_operations = {
-        .lookup         = hfsplus_lookup,
+        .lookup                 = hfsplus_lookup,
-        .create         = hfsplus_create,
+        .create                 = hfsplus_create,
-        .link           = hfsplus_link,
+        .link                   = hfsplus_link,
-        .unlink         = hfsplus_unlink,
+        .unlink                 = hfsplus_unlink,
-        .mkdir          = hfsplus_mkdir,
+        .mkdir                  = hfsplus_mkdir,
-        .rmdir          = hfsplus_rmdir,
+        .rmdir                  = hfsplus_rmdir,
-        .symlink        = hfsplus_symlink,
+        .symlink                = hfsplus_symlink,
-        .mknod          = hfsplus_mknod,
+        .mknod                  = hfsplus_mknod,
-        .rename         = hfsplus_rename,
+        .rename                 = hfsplus_rename,
+        .setxattr               = generic_setxattr,
+        .getxattr               = generic_getxattr,
+        .listxattr              = hfsplus_listxattr,
+        .removexattr            = hfsplus_removexattr,
 };
 const struct file_operations hfsplus_dir_operations = {
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index eba76eab6d62..a94f0f779d5e 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -95,7 +95,7 @@ static void __hfsplus_ext_write_extent(struct inode *inode,
                              HFSPLUS_IS_RSRC(inode) ?
                                HFSPLUS_TYPE_RSRC : HFSPLUS_TYPE_DATA);
-        res = hfs_brec_find(fd);
+        res = hfs_brec_find(fd, hfs_find_rec_by_key);
        if (hip->extent_state & HFSPLUS_EXT_NEW) {
                if (res != -ENOENT)
                        return;
@@ -154,7 +154,7 @@ static inline int __hfsplus_ext_read_extent(struct hfs_find_data *fd,
        hfsplus_ext_build_key(fd->search_key, cnid, block, type);
        fd->key->ext.cnid = 0;
-        res = hfs_brec_find(fd);
+        res = hfs_brec_find(fd, hfs_find_rec_by_key);
        if (res && res != -ENOENT)
                return res;
        if (fd->key->ext.cnid != fd->search_key->ext.cnid ||
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index a6da86b1b4c1..05b11f36024c 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -23,6 +23,7 @@
 #define DBG_SUPER       0x00000010
 #define DBG_EXTENT      0x00000020
 #define DBG_BITMAP      0x00000040
+#define DBG_ATTR_MOD    0x00000080
 #if 0
 #define DBG_MASK        (DBG_EXTENT|DBG_INODE|DBG_BNODE_MOD)
@@ -46,6 +47,13 @@ typedef int (*btree_keycmp)(const hfsplus_btree_key *,
 #define NODE_HASH_SIZE  256
+/* B-tree mutex nested subclasses */
+enum hfsplus_btree_mutex_classes {
+        CATALOG_BTREE_MUTEX,
+        EXTENTS_BTREE_MUTEX,
+        ATTR_BTREE_MUTEX,
+};
 /* An HFS+ BTree held in memory */
 struct hfs_btree {
        struct super_block *sb;
@@ -223,6 +231,7 @@ struct hfsplus_inode_info {
 #define HFSPLUS_I_CAT_DIRTY     1       /* has changes in the catalog tree */
 #define HFSPLUS_I_EXT_DIRTY     2       /* has changes in the extent tree */
 #define HFSPLUS_I_ALLOC_DIRTY   3       /* has changes in the allocation file */
+#define HFSPLUS_I_ATTR_DIRTY    4       /* has changes in the attributes tree */
 #define HFSPLUS_IS_RSRC(inode) \
        test_bit(HFSPLUS_I_RSRC, &HFSPLUS_I(inode)->flags)
@@ -302,7 +311,7 @@ static inline unsigned short hfsplus_min_io_size(struct super_block *sb)
 #define hfs_brec_remove hfsplus_brec_remove
 #define hfs_find_init hfsplus_find_init
 #define hfs_find_exit hfsplus_find_exit
-#define __hfs_brec_find __hplusfs_brec_find
+#define __hfs_brec_find __hfsplus_brec_find
 #define hfs_brec_find hfsplus_brec_find
 #define hfs_brec_read hfsplus_brec_read
 #define hfs_brec_goto hfsplus_brec_goto
@@ -324,10 +333,33 @@ static inline unsigned short hfsplus_min_io_size(struct super_block *sb)
 */
 #define HFSPLUS_IOC_BLESS _IO('h', 0x80)
+typedef int (*search_strategy_t)(struct hfs_bnode *,
+                                struct hfs_find_data *,
+                                int *, int *, int *);
 /*
 * Functions in any *.c used in other files
 */
+/* attributes.c */
+int hfsplus_create_attr_tree_cache(void);
+void hfsplus_destroy_attr_tree_cache(void);
+hfsplus_attr_entry *hfsplus_alloc_attr_entry(void);
+void hfsplus_destroy_attr_entry(hfsplus_attr_entry *entry_p);
+int hfsplus_attr_bin_cmp_key(const hfsplus_btree_key *,
+                const hfsplus_btree_key *);
+int hfsplus_attr_build_key(struct super_block *, hfsplus_btree_key *,
+                        u32, const char *);
+void hfsplus_attr_build_key_uni(hfsplus_btree_key *key,
+                                        u32 cnid,
+                                        struct hfsplus_attr_unistr *name);
+int hfsplus_find_attr(struct super_block *, u32,
+                        const char *, struct hfs_find_data *);
+int hfsplus_attr_exists(struct inode *inode, const char *name);
+int hfsplus_create_attr(struct inode *, const char *, const void *, size_t);
+int hfsplus_delete_attr(struct inode *, const char *);
+int hfsplus_delete_all_attrs(struct inode *dir, u32 cnid);
 /* bitmap.c */
 int hfsplus_block_allocate(struct super_block *, u32, u32, u32 *);
 int hfsplus_block_free(struct super_block *, u32, u32);
@@ -369,8 +401,15 @@ int hfs_brec_remove(struct hfs_find_data *);
 /* bfind.c */
 int hfs_find_init(struct hfs_btree *, struct hfs_find_data *);
 void hfs_find_exit(struct hfs_find_data *);
-int __hfs_brec_find(struct hfs_bnode *, struct hfs_find_data *);
+int hfs_find_1st_rec_by_cnid(struct hfs_bnode *,
-int hfs_brec_find(struct hfs_find_data *);
+                                struct hfs_find_data *,
+                                int *, int *, int *);
+int hfs_find_rec_by_key(struct hfs_bnode *,
+                                struct hfs_find_data *,
+                                int *, int *, int *);
+int __hfs_brec_find(struct hfs_bnode *, struct hfs_find_data *,
+                                search_strategy_t);
+int hfs_brec_find(struct hfs_find_data *, search_strategy_t);
 int hfs_brec_read(struct hfs_find_data *, void *, int);
 int hfs_brec_goto(struct hfs_find_data *, int);
@@ -417,11 +456,6 @@ int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
 /* ioctl.c */
 long hfsplus_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
-int hfsplus_setxattr(struct dentry *dentry, const char *name,
-                     const void *value, size_t size, int flags);
-ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name,
-                         void *value, size_t size);
-ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size);
 /* options.c */
 int hfsplus_parse_options(char *, struct hfsplus_sb_info *);
@@ -446,7 +480,7 @@ int hfsplus_strcmp(const struct hfsplus_unistr *,
 int hfsplus_uni2asc(struct super_block *,
                const struct hfsplus_unistr *, char *, int *);
 int hfsplus_asc2uni(struct super_block *,
-                struct hfsplus_unistr *, const char *, int);
+                struct hfsplus_unistr *, int, const char *, int);
 int hfsplus_hash_dentry(const struct dentry *dentry,
                const struct inode *inode, struct qstr *str);
 int hfsplus_compare_dentry(const struct dentry *parent,
diff --git a/fs/hfsplus/hfsplus_raw.h b/fs/hfsplus/hfsplus_raw.h
index 921967e5abb1..452ede01b036 100644
--- a/fs/hfsplus/hfsplus_raw.h
+++ b/fs/hfsplus/hfsplus_raw.h
@@ -52,13 +52,23 @@
 typedef __be32 hfsplus_cnid;
 typedef __be16 hfsplus_unichr;
+#define HFSPLUS_MAX_STRLEN 255
+#define HFSPLUS_ATTR_MAX_STRLEN 127
 /* A "string" as used in filenames, etc. */
 struct hfsplus_unistr {
        __be16 length;
-        hfsplus_unichr unicode[255];
+        hfsplus_unichr unicode[HFSPLUS_MAX_STRLEN];
 } __packed;
-#define HFSPLUS_MAX_STRLEN 255
+/*
+ * A "string" is used in attributes file
+ * for name of extended attribute
+ */
+struct hfsplus_attr_unistr {
+        __be16 length;
+        hfsplus_unichr unicode[HFSPLUS_ATTR_MAX_STRLEN];
+} __packed;
 /* POSIX permissions */
 struct hfsplus_perm {
@@ -291,6 +301,8 @@ struct hfsplus_cat_file {
 /* File attribute bits */
 #define HFSPLUS_FILE_LOCKED             0x0001
 #define HFSPLUS_FILE_THREAD_EXISTS      0x0002
+#define HFSPLUS_XATTR_EXISTS            0x0004
+#define HFSPLUS_ACL_EXISTS              0x0008
 /* HFS+ catalog thread (part of a cat_entry) */
 struct hfsplus_cat_thread {
@@ -327,11 +339,63 @@ struct hfsplus_ext_key {
 #define HFSPLUS_EXT_KEYLEN      sizeof(struct hfsplus_ext_key)
+#define HFSPLUS_XATTR_FINDER_INFO_NAME "com.apple.FinderInfo"
+#define HFSPLUS_XATTR_ACL_NAME "com.apple.system.Security"
+#define HFSPLUS_ATTR_INLINE_DATA 0x10
+#define HFSPLUS_ATTR_FORK_DATA   0x20
+#define HFSPLUS_ATTR_EXTENTS     0x30
+/* HFS+ attributes tree key */
+struct hfsplus_attr_key {
+        __be16 key_len;
+        __be16 pad;
+        hfsplus_cnid cnid;
+        __be32 start_block;
+        struct hfsplus_attr_unistr key_name;
+} __packed;
+#define HFSPLUS_ATTR_KEYLEN     sizeof(struct hfsplus_attr_key)
+/* HFS+ fork data attribute */
+struct hfsplus_attr_fork_data {
+        __be32 record_type;
+        __be32 reserved;
+        struct hfsplus_fork_raw the_fork;
+} __packed;
+/* HFS+ extension attribute */
+struct hfsplus_attr_extents {
+        __be32 record_type;
+        __be32 reserved;
+        struct hfsplus_extent extents;
+} __packed;
+#define HFSPLUS_MAX_INLINE_DATA_SIZE 3802
+/* HFS+ attribute inline data */
+struct hfsplus_attr_inline_data {
+        __be32 record_type;
+        __be32 reserved1;
+        u8 reserved2[6];
+        __be16 length;
+        u8 raw_bytes[HFSPLUS_MAX_INLINE_DATA_SIZE];
+} __packed;
+/* A data record in the attributes tree */
+typedef union {
+        __be32 record_type;
+        struct hfsplus_attr_fork_data fork_data;
+        struct hfsplus_attr_extents extents;
+        struct hfsplus_attr_inline_data inline_data;
+} __packed hfsplus_attr_entry;
 /* HFS+ generic BTree key */
 typedef union {
        __be16 key_len;
        struct hfsplus_cat_key cat;
        struct hfsplus_ext_key ext;
+        struct hfsplus_attr_key attr;
 } __packed hfsplus_btree_key;
 #endif
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 799b336b59f9..160ccc9cdb4b 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -17,6 +17,7 @@
 #include "hfsplus_fs.h"
 #include "hfsplus_raw.h"
+#include "xattr.h"
 static int hfsplus_readpage(struct file *file, struct page *page)
 {
@@ -124,7 +125,7 @@ static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb,
 {
        struct file *file = iocb->ki_filp;
        struct address_space *mapping = file->f_mapping;
-        struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host;
+        struct inode *inode = file_inode(file)->i_mapping->host;
        ssize_t ret;
        ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
@@ -348,6 +349,18 @@ int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
                        error = error2;
        }
+        if (test_and_clear_bit(HFSPLUS_I_ATTR_DIRTY, &hip->flags)) {
+                if (sbi->attr_tree) {
+                        error2 =
+                                filemap_write_and_wait(
+                                            sbi->attr_tree->inode->i_mapping);
+                        if (!error)
+                                error = error2;
+                } else {
+                        printk(KERN_ERR "hfs: sync non-existent attributes tree\n");
+                }
+        }
        if (test_and_clear_bit(HFSPLUS_I_ALLOC_DIRTY, &hip->flags)) {
                error2 = filemap_write_and_wait(sbi->alloc_file->i_mapping);
                if (!error)
@@ -365,9 +378,10 @@ int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
 static const struct inode_operations hfsplus_file_inode_operations = {
        .lookup         = hfsplus_file_lookup,
        .setattr        = hfsplus_setattr,
-        .setxattr       = hfsplus_setxattr,
+        .setxattr       = generic_setxattr,
-        .getxattr       = hfsplus_getxattr,
+        .getxattr       = generic_getxattr,
        .listxattr      = hfsplus_listxattr,
+        .removexattr    = hfsplus_removexattr,
 };
 static const struct file_operations hfsplus_file_operations = {
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c
index 09addc8615fa..d3ff5cc317d7 100644
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -16,7 +16,6 @@
 #include <linux/fs.h>
 #include <linux/mount.h>
 #include <linux/sched.h>
-#include <linux/xattr.h>
 #include <asm/uaccess.h>
 #include "hfsplus_fs.h"
@@ -59,7 +58,7 @@ static int hfsplus_ioctl_bless(struct file *file, int __user *user_flags)
 static int hfsplus_ioctl_getflags(struct file *file, int __user *user_flags)
 {
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(file);
        struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
        unsigned int flags = 0;
@@ -75,7 +74,7 @@ static int hfsplus_ioctl_getflags(struct file *file, int __user *user_flags)
 static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags)
 {
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(file);
        struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
        unsigned int flags;
        int err = 0;
@@ -151,110 +150,3 @@ long hfsplus_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
                return -ENOTTY;
        }
 }
-int hfsplus_setxattr(struct dentry *dentry, const char *name,
-                     const void *value, size_t size, int flags)
-{
-        struct inode *inode = dentry->d_inode;
-        struct hfs_find_data fd;
-        hfsplus_cat_entry entry;
-        struct hfsplus_cat_file *file;
-        int res;
-        if (!S_ISREG(inode->i_mode) || HFSPLUS_IS_RSRC(inode))
-                return -EOPNOTSUPP;
-        res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd);
-        if (res)
-                return res;
-        res = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd);
-        if (res)
-                goto out;
-        hfs_bnode_read(fd.bnode, &entry, fd.entryoffset,
-                        sizeof(struct hfsplus_cat_file));
-        file = &entry.file;
-        if (!strcmp(name, "hfs.type")) {
-                if (size == 4)
-                        memcpy(&file->user_info.fdType, value, 4);
-                else
-                        res = -ERANGE;
-        } else if (!strcmp(name, "hfs.creator")) {
-                if (size == 4)
-                        memcpy(&file->user_info.fdCreator, value, 4);
-                else
-                        res = -ERANGE;
-        } else
-                res = -EOPNOTSUPP;
-        if (!res) {
-                hfs_bnode_write(fd.bnode, &entry, fd.entryoffset,
-                                sizeof(struct hfsplus_cat_file));
-                hfsplus_mark_inode_dirty(inode, HFSPLUS_I_CAT_DIRTY);
-        }
-out:
-        hfs_find_exit(&fd);
-        return res;
-}
-ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name,
-                         void *value, size_t size)
-{
-        struct inode *inode = dentry->d_inode;
-        struct hfs_find_data fd;
-        hfsplus_cat_entry entry;
-        struct hfsplus_cat_file *file;
-        ssize_t res = 0;
-        if (!S_ISREG(inode->i_mode) || HFSPLUS_IS_RSRC(inode))
-                return -EOPNOTSUPP;
-        if (size) {
-                res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd);
-                if (res)
-                        return res;
-                res = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd);
-                if (res)
-                        goto out;
-                hfs_bnode_read(fd.bnode, &entry, fd.entryoffset,
-                                sizeof(struct hfsplus_cat_file));
-        }
-        file = &entry.file;
-        if (!strcmp(name, "hfs.type")) {
-                if (size >= 4) {
-                        memcpy(value, &file->user_info.fdType, 4);
-                        res = 4;
-                } else
-                        res = size ? -ERANGE : 4;
-        } else if (!strcmp(name, "hfs.creator")) {
-                if (size >= 4) {
-                        memcpy(value, &file->user_info.fdCreator, 4);
-                        res = 4;
-                } else
-                        res = size ? -ERANGE : 4;
-        } else
-                res = -EOPNOTSUPP;
-out:
-        if (size)
-                hfs_find_exit(&fd);
-        return res;
-}
-#define HFSPLUS_ATTRLIST_SIZE (sizeof("hfs.creator")+sizeof("hfs.type"))
-ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size)
-{
-        struct inode *inode = dentry->d_inode;
-        if (!S_ISREG(inode->i_mode) || HFSPLUS_IS_RSRC(inode))
-                return -EOPNOTSUPP;
-        if (!buffer || !size)
-                return HFSPLUS_ATTRLIST_SIZE;
-        if (size < HFSPLUS_ATTRLIST_SIZE)
-                return -ERANGE;
-        strcpy(buffer, "hfs.type");
-        strcpy(buffer + sizeof("hfs.type"), "hfs.creator");
-        return HFSPLUS_ATTRLIST_SIZE;
-}
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 796198d26553..974c26f96fae 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -20,6 +20,7 @@ static struct inode *hfsplus_alloc_inode(struct super_block *sb);
 static void hfsplus_destroy_inode(struct inode *inode);
 #include "hfsplus_fs.h"
+#include "xattr.h"
 static int hfsplus_system_read_inode(struct inode *inode)
 {
@@ -118,6 +119,7 @@ static int hfsplus_system_write_inode(struct inode *inode)
        case HFSPLUS_ATTR_CNID:
                fork = &vhdr->attr_file;
                tree = sbi->attr_tree;
+                break;
        default:
                return -EIO;
        }
@@ -191,6 +193,12 @@ static int hfsplus_sync_fs(struct super_block *sb, int wait)
        error2 = filemap_write_and_wait(sbi->ext_tree->inode->i_mapping);
        if (!error)
                error = error2;
+        if (sbi->attr_tree) {
+                error2 =
+                    filemap_write_and_wait(sbi->attr_tree->inode->i_mapping);
+                if (!error)
+                        error = error2;
+        }
        error2 = filemap_write_and_wait(sbi->alloc_file->i_mapping);
        if (!error)
                error = error2;
@@ -281,6 +289,7 @@ static void hfsplus_put_super(struct super_block *sb)
                hfsplus_sync_fs(sb, 1);
        }
+        hfs_btree_close(sbi->attr_tree);
        hfs_btree_close(sbi->cat_tree);
        hfs_btree_close(sbi->ext_tree);
        iput(sbi->alloc_file);
@@ -477,12 +486,20 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
                printk(KERN_ERR "hfs: failed to load catalog file\n");
                goto out_close_ext_tree;
        }
+        if (vhdr->attr_file.total_blocks != 0) {
+                sbi->attr_tree = hfs_btree_open(sb, HFSPLUS_ATTR_CNID);
+                if (!sbi->attr_tree) {
+                        printk(KERN_ERR "hfs: failed to load attributes file\n");
+                        goto out_close_cat_tree;
+                }
+        }
+        sb->s_xattr = hfsplus_xattr_handlers;
        inode = hfsplus_iget(sb, HFSPLUS_ALLOC_CNID);
        if (IS_ERR(inode)) {
                printk(KERN_ERR "hfs: failed to load allocation file\n");
                err = PTR_ERR(inode);
-                goto out_close_cat_tree;
+                goto out_close_attr_tree;
        }
        sbi->alloc_file = inode;
@@ -542,10 +559,27 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
                        }
                        err = hfsplus_create_cat(sbi->hidden_dir->i_ino, root,
                                                 &str, sbi->hidden_dir);
-                        mutex_unlock(&sbi->vh_mutex);
+                        if (err) {
-                        if (err)
+                                mutex_unlock(&sbi->vh_mutex);
+                                goto out_put_hidden_dir;
+                        }
+                        err = hfsplus_init_inode_security(sbi->hidden_dir,
+                                                                root, &str);
+                        if (err == -EOPNOTSUPP)
+                                err = 0; /* Operation is not supported. */
+                        else if (err) {
+                                /*
+                                 * Try to delete anyway without
+                                 * error analysis.
+                                 */
+                                hfsplus_delete_cat(sbi->hidden_dir->i_ino,
+                                                        root, &str);
+                                mutex_unlock(&sbi->vh_mutex);
                                goto out_put_hidden_dir;
+                        }
+                        mutex_unlock(&sbi->vh_mutex);
                        hfsplus_mark_inode_dirty(sbi->hidden_dir,
                                                 HFSPLUS_I_CAT_DIRTY);
                }
@@ -562,6 +596,8 @@ out_put_root:
        sb->s_root = NULL;
 out_put_alloc_file:
        iput(sbi->alloc_file);
+out_close_attr_tree:
+        hfs_btree_close(sbi->attr_tree);
 out_close_cat_tree:
        hfs_btree_close(sbi->cat_tree);
 out_close_ext_tree:
@@ -635,9 +671,20 @@ static int __init init_hfsplus_fs(void)
                hfsplus_init_once);
        if (!hfsplus_inode_cachep)
                return -ENOMEM;
+        err = hfsplus_create_attr_tree_cache();
+        if (err)
+                goto destroy_inode_cache;
        err = register_filesystem(&hfsplus_fs_type);
        if (err)
-                kmem_cache_destroy(hfsplus_inode_cachep);
+                goto destroy_attr_tree_cache;
+        return 0;
+destroy_attr_tree_cache:
+        hfsplus_destroy_attr_tree_cache();
+destroy_inode_cache:
+        kmem_cache_destroy(hfsplus_inode_cachep);
        return err;
 }
@@ -650,6 +697,7 @@ static void __exit exit_hfsplus_fs(void)
         * destroy cache.
         */
        rcu_barrier();
+        hfsplus_destroy_attr_tree_cache();
        kmem_cache_destroy(hfsplus_inode_cachep);
 }
diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c
index a32998f29f0b..2c2e47dcfdd8 100644
--- a/fs/hfsplus/unicode.c
+++ b/fs/hfsplus/unicode.c
@@ -295,7 +295,8 @@ static inline u16 *decompose_unichar(wchar_t uc, int *size)
        return hfsplus_decompose_table + (off / 4);
 }
-int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr,
+int hfsplus_asc2uni(struct super_block *sb,
+                    struct hfsplus_unistr *ustr, int max_unistr_len,
                    const char *astr, int len)
 {
        int size, dsize, decompose;
@@ -303,7 +304,7 @@ int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr,
        wchar_t c;
        decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
-        while (outlen < HFSPLUS_MAX_STRLEN && len > 0) {
+        while (outlen < max_unistr_len && len > 0) {
                size = asc2unichar(sb, astr, len, &c);
                if (decompose)
@@ -311,7 +312,7 @@ int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr,
                else
                        dstr = NULL;
                if (dstr) {
-                        if (outlen + dsize > HFSPLUS_MAX_STRLEN)
+                        if (outlen + dsize > max_unistr_len)
                                break;
                        do {
                                ustr->unicode[outlen++] = cpu_to_be16(*dstr++);
diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c
new file mode 100644
index 000000000000..e8a4b0815c61
--- /dev/null
+++ b/fs/hfsplus/xattr.c
@@ -0,0 +1,709 @@
+/*
+ * linux/fs/hfsplus/xattr.c
+ *
+ * Vyacheslav Dubeyko <slava@dubeyko.com>
+ *
+ * Logic of processing extended attributes
+ */
+#include "hfsplus_fs.h"
+#include "xattr.h"
+const struct xattr_handler *hfsplus_xattr_handlers[] = {
+        &hfsplus_xattr_osx_handler,
+        &hfsplus_xattr_user_handler,
+        &hfsplus_xattr_trusted_handler,
+        &hfsplus_xattr_security_handler,
+        NULL
+};
+static int strcmp_xattr_finder_info(const char *name)
+{
+        if (name) {
+                return strncmp(name, HFSPLUS_XATTR_FINDER_INFO_NAME,
+                                sizeof(HFSPLUS_XATTR_FINDER_INFO_NAME));
+        }
+        return -1;
+}
+static int strcmp_xattr_acl(const char *name)
+{
+        if (name) {
+                return strncmp(name, HFSPLUS_XATTR_ACL_NAME,
+                                sizeof(HFSPLUS_XATTR_ACL_NAME));
+        }
+        return -1;
+}
+static inline int is_known_namespace(const char *name)
+{
+        if (strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) &&
+            strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) &&
+            strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) &&
+            strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN))
+                return false;
+        return true;
+}
+static int can_set_xattr(struct inode *inode, const char *name,
+                                const void *value, size_t value_len)
+{
+        if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
+                return -EOPNOTSUPP; /* TODO: implement ACL support */
+        if (!strncmp(name, XATTR_MAC_OSX_PREFIX, XATTR_MAC_OSX_PREFIX_LEN)) {
+                /*
+                 * This makes sure that we aren't trying to set an
+                 * attribute in a different namespace by prefixing it
+                 * with "osx."
+                 */
+                if (is_known_namespace(name + XATTR_MAC_OSX_PREFIX_LEN))
+                        return -EOPNOTSUPP;
+                return 0;
+        }
+        /*
+         * Don't allow setting an attribute in an unknown namespace.
+         */
+        if (strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) &&
+            strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) &&
+            strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
+                return -EOPNOTSUPP;
+        return 0;
+}
+int __hfsplus_setxattr(struct inode *inode, const char *name,
+                        const void *value, size_t size, int flags)
+{
+        int err = 0;
+        struct hfs_find_data cat_fd;
+        hfsplus_cat_entry entry;
+        u16 cat_entry_flags, cat_entry_type;
+        u16 folder_finderinfo_len = sizeof(struct DInfo) +
+                                        sizeof(struct DXInfo);
+        u16 file_finderinfo_len = sizeof(struct FInfo) +
+                                        sizeof(struct FXInfo);
+        if ((!S_ISREG(inode->i_mode) &&
+                        !S_ISDIR(inode->i_mode)) ||
+                                HFSPLUS_IS_RSRC(inode))
+                return -EOPNOTSUPP;
+        err = can_set_xattr(inode, name, value, size);
+        if (err)
+                return err;
+        if (strncmp(name, XATTR_MAC_OSX_PREFIX,
+                                XATTR_MAC_OSX_PREFIX_LEN) == 0)
+                name += XATTR_MAC_OSX_PREFIX_LEN;
+        if (value == NULL) {
+                value = "";
+                size = 0;
+        }
+        err = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &cat_fd);
+        if (err) {
+                printk(KERN_ERR "hfs: can't init xattr find struct\n");
+                return err;
+        }
+        err = hfsplus_find_cat(inode->i_sb, inode->i_ino, &cat_fd);
+        if (err) {
+                printk(KERN_ERR "hfs: catalog searching failed\n");
+                goto end_setxattr;
+        }
+        if (!strcmp_xattr_finder_info(name)) {
+                if (flags & XATTR_CREATE) {
+                        printk(KERN_ERR "hfs: xattr exists yet\n");
+                        err = -EOPNOTSUPP;
+                        goto end_setxattr;
+                }
+                hfs_bnode_read(cat_fd.bnode, &entry, cat_fd.entryoffset,
+                                        sizeof(hfsplus_cat_entry));
+                if (be16_to_cpu(entry.type) == HFSPLUS_FOLDER) {
+                        if (size == folder_finderinfo_len) {
+                                memcpy(&entry.folder.user_info, value,
+                                                folder_finderinfo_len);
+                                hfs_bnode_write(cat_fd.bnode, &entry,
+                                        cat_fd.entryoffset,
+                                        sizeof(struct hfsplus_cat_folder));
+                                hfsplus_mark_inode_dirty(inode,
+                                                HFSPLUS_I_CAT_DIRTY);
+                        } else {
+                                err = -ERANGE;
+                                goto end_setxattr;
+                        }
+                } else if (be16_to_cpu(entry.type) == HFSPLUS_FILE) {
+                        if (size == file_finderinfo_len) {
+                                memcpy(&entry.file.user_info, value,
+                                                file_finderinfo_len);
+                                hfs_bnode_write(cat_fd.bnode, &entry,
+                                        cat_fd.entryoffset,
+                                        sizeof(struct hfsplus_cat_file));
+                                hfsplus_mark_inode_dirty(inode,
+                                                HFSPLUS_I_CAT_DIRTY);
+                        } else {
+                                err = -ERANGE;
+                                goto end_setxattr;
+                        }
+                } else {
+                        err = -EOPNOTSUPP;
+                        goto end_setxattr;
+                }
+                goto end_setxattr;
+        }
+        if (!HFSPLUS_SB(inode->i_sb)->attr_tree) {
+                err = -EOPNOTSUPP;
+                goto end_setxattr;
+        }
+        if (hfsplus_attr_exists(inode, name)) {
+                if (flags & XATTR_CREATE) {
+                        printk(KERN_ERR "hfs: xattr exists yet\n");
+                        err = -EOPNOTSUPP;
+                        goto end_setxattr;
+                }
+                err = hfsplus_delete_attr(inode, name);
+                if (err)
+                        goto end_setxattr;
+                err = hfsplus_create_attr(inode, name, value, size);
+                if (err)
+                        goto end_setxattr;
+        } else {
+                if (flags & XATTR_REPLACE) {
+                        printk(KERN_ERR "hfs: cannot replace xattr\n");
+                        err = -EOPNOTSUPP;
+                        goto end_setxattr;
+                }
+                err = hfsplus_create_attr(inode, name, value, size);
+                if (err)
+                        goto end_setxattr;
+        }
+        cat_entry_type = hfs_bnode_read_u16(cat_fd.bnode, cat_fd.entryoffset);
+        if (cat_entry_type == HFSPLUS_FOLDER) {
+                cat_entry_flags = hfs_bnode_read_u16(cat_fd.bnode,
+                                    cat_fd.entryoffset +
+                                    offsetof(struct hfsplus_cat_folder, flags));
+                cat_entry_flags |= HFSPLUS_XATTR_EXISTS;
+                if (!strcmp_xattr_acl(name))
+                        cat_entry_flags |= HFSPLUS_ACL_EXISTS;
+                hfs_bnode_write_u16(cat_fd.bnode, cat_fd.entryoffset +
+                                offsetof(struct hfsplus_cat_folder, flags),
+                                cat_entry_flags);
+                hfsplus_mark_inode_dirty(inode, HFSPLUS_I_CAT_DIRTY);
+        } else if (cat_entry_type == HFSPLUS_FILE) {
+                cat_entry_flags = hfs_bnode_read_u16(cat_fd.bnode,
+                                    cat_fd.entryoffset +
+                                    offsetof(struct hfsplus_cat_file, flags));
+                cat_entry_flags |= HFSPLUS_XATTR_EXISTS;
+                if (!strcmp_xattr_acl(name))
+                        cat_entry_flags |= HFSPLUS_ACL_EXISTS;
+                hfs_bnode_write_u16(cat_fd.bnode, cat_fd.entryoffset +
+                                    offsetof(struct hfsplus_cat_file, flags),
+                                    cat_entry_flags);
+                hfsplus_mark_inode_dirty(inode, HFSPLUS_I_CAT_DIRTY);
+        } else {
+                printk(KERN_ERR "hfs: invalid catalog entry type\n");
+                err = -EIO;
+                goto end_setxattr;
+        }
+end_setxattr:
+        hfs_find_exit(&cat_fd);
+        return err;
+}
+static inline int is_osx_xattr(const char *xattr_name)
+{
+        return !is_known_namespace(xattr_name);
+}
+static int name_len(const char *xattr_name, int xattr_name_len)
+{
+        int len = xattr_name_len + 1;
+        if (is_osx_xattr(xattr_name))
+                len += XATTR_MAC_OSX_PREFIX_LEN;
+        return len;
+}
+static int copy_name(char *buffer, const char *xattr_name, int name_len)
+{
+        int len = name_len;
+        int offset = 0;
+        if (is_osx_xattr(xattr_name)) {
+                strncpy(buffer, XATTR_MAC_OSX_PREFIX, XATTR_MAC_OSX_PREFIX_LEN);
+                offset += XATTR_MAC_OSX_PREFIX_LEN;
+                len += XATTR_MAC_OSX_PREFIX_LEN;
+        }
+        strncpy(buffer + offset, xattr_name, name_len);
+        memset(buffer + offset + name_len, 0, 1);
+        len += 1;
+        return len;
+}
+static ssize_t hfsplus_getxattr_finder_info(struct dentry *dentry,
+                                                void *value, size_t size)
+{
+        ssize_t res = 0;
+        struct inode *inode = dentry->d_inode;
+        struct hfs_find_data fd;
+        u16 entry_type;
+        u16 folder_rec_len = sizeof(struct DInfo) + sizeof(struct DXInfo);
+        u16 file_rec_len = sizeof(struct FInfo) + sizeof(struct FXInfo);
+        u16 record_len = max(folder_rec_len, file_rec_len);
+        u8 folder_finder_info[sizeof(struct DInfo) + sizeof(struct DXInfo)];
+        u8 file_finder_info[sizeof(struct FInfo) + sizeof(struct FXInfo)];
+        if (size >= record_len) {
+                res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd);
+                if (res) {
+                        printk(KERN_ERR "hfs: can't init xattr find struct\n");
+                        return res;
+                }
+                res = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd);
+                if (res)
+                        goto end_getxattr_finder_info;
+                entry_type = hfs_bnode_read_u16(fd.bnode, fd.entryoffset);
+                if (entry_type == HFSPLUS_FOLDER) {
+                        hfs_bnode_read(fd.bnode, folder_finder_info,
+                                fd.entryoffset +
+                                offsetof(struct hfsplus_cat_folder, user_info),
+                                folder_rec_len);
+                        memcpy(value, folder_finder_info, folder_rec_len);
+                        res = folder_rec_len;
+                } else if (entry_type == HFSPLUS_FILE) {
+                        hfs_bnode_read(fd.bnode, file_finder_info,
+                                fd.entryoffset +
+                                offsetof(struct hfsplus_cat_file, user_info),
+                                file_rec_len);
+                        memcpy(value, file_finder_info, file_rec_len);
+                        res = file_rec_len;
+                } else {
+                        res = -EOPNOTSUPP;
+                        goto end_getxattr_finder_info;
+                }
+        } else
+                res = size ? -ERANGE : record_len;
+end_getxattr_finder_info:
+        if (size >= record_len)
+                hfs_find_exit(&fd);
+        return res;
+}
+ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name,
+                         void *value, size_t size)
+{
+        struct inode *inode = dentry->d_inode;
+        struct hfs_find_data fd;
+        hfsplus_attr_entry *entry;
+        __be32 xattr_record_type;
+        u32 record_type;
+        u16 record_length = 0;
+        ssize_t res = 0;
+        if ((!S_ISREG(inode->i_mode) &&
+                        !S_ISDIR(inode->i_mode)) ||
+                                HFSPLUS_IS_RSRC(inode))
+                return -EOPNOTSUPP;
+        if (strncmp(name, XATTR_MAC_OSX_PREFIX,
+                                XATTR_MAC_OSX_PREFIX_LEN) == 0) {
+                /* skip "osx." prefix */
+                name += XATTR_MAC_OSX_PREFIX_LEN;
+                /*
+                 * Don't allow retrieving properly prefixed attributes
+                 * by prepending them with "osx."
+                 */
+                if (is_known_namespace(name))
+                        return -EOPNOTSUPP;
+        }
+        if (!strcmp_xattr_finder_info(name))
+                return hfsplus_getxattr_finder_info(dentry, value, size);
+        if (!HFSPLUS_SB(inode->i_sb)->attr_tree)
+                return -EOPNOTSUPP;
+        entry = hfsplus_alloc_attr_entry();
+        if (!entry) {
+                printk(KERN_ERR "hfs: can't allocate xattr entry\n");
+                return -ENOMEM;
+        }
+        res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->attr_tree, &fd);
+        if (res) {
+                printk(KERN_ERR "hfs: can't init xattr find struct\n");
+                goto failed_getxattr_init;
+        }
+        res = hfsplus_find_attr(inode->i_sb, inode->i_ino, name, &fd);
+        if (res) {
+                if (res == -ENOENT)
+                        res = -ENODATA;
+                else
+                        printk(KERN_ERR "hfs: xattr searching failed\n");
+                goto out;
+        }
+        hfs_bnode_read(fd.bnode, &xattr_record_type,
+                        fd.entryoffset, sizeof(xattr_record_type));
+        record_type = be32_to_cpu(xattr_record_type);
+        if (record_type == HFSPLUS_ATTR_INLINE_DATA) {
+                record_length = hfs_bnode_read_u16(fd.bnode,
+                                fd.entryoffset +
+                                offsetof(struct hfsplus_attr_inline_data,
+                                length));
+                if (record_length > HFSPLUS_MAX_INLINE_DATA_SIZE) {
+                        printk(KERN_ERR "hfs: invalid xattr record size\n");
+                        res = -EIO;
+                        goto out;
+                }
+        } else if (record_type == HFSPLUS_ATTR_FORK_DATA ||
+                        record_type == HFSPLUS_ATTR_EXTENTS) {
+                printk(KERN_ERR "hfs: only inline data xattr are supported\n");
+                res = -EOPNOTSUPP;
+                goto out;
+        } else {
+                printk(KERN_ERR "hfs: invalid xattr record\n");
+                res = -EIO;
+                goto out;
+        }
+        if (size) {
+                hfs_bnode_read(fd.bnode, entry, fd.entryoffset,
+                                offsetof(struct hfsplus_attr_inline_data,
+                                        raw_bytes) + record_length);
+        }
+        if (size >= record_length) {
+                memcpy(value, entry->inline_data.raw_bytes, record_length);
+                res = record_length;
+        } else
+                res = size ? -ERANGE : record_length;
+out:
+        hfs_find_exit(&fd);
+failed_getxattr_init:
+        hfsplus_destroy_attr_entry(entry);
+        return res;
+}
+static inline int can_list(const char *xattr_name)
+{
+        if (!xattr_name)
+                return 0;
+        return strncmp(xattr_name, XATTR_TRUSTED_PREFIX,
+                        XATTR_TRUSTED_PREFIX_LEN) ||
+                                capable(CAP_SYS_ADMIN);
+}
+static ssize_t hfsplus_listxattr_finder_info(struct dentry *dentry,
+                                                char *buffer, size_t size)
+{
+        ssize_t res = 0;
+        struct inode *inode = dentry->d_inode;
+        struct hfs_find_data fd;
+        u16 entry_type;
+        u8 folder_finder_info[sizeof(struct DInfo) + sizeof(struct DXInfo)];
+        u8 file_finder_info[sizeof(struct FInfo) + sizeof(struct FXInfo)];
+        unsigned long len, found_bit;
+        int xattr_name_len, symbols_count;
+        res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd);
+        if (res) {
+                printk(KERN_ERR "hfs: can't init xattr find struct\n");
+                return res;
+        }
+        res = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd);
+        if (res)
+                goto end_listxattr_finder_info;
+        entry_type = hfs_bnode_read_u16(fd.bnode, fd.entryoffset);
+        if (entry_type == HFSPLUS_FOLDER) {
+                len = sizeof(struct DInfo) + sizeof(struct DXInfo);
+                hfs_bnode_read(fd.bnode, folder_finder_info,
+                                fd.entryoffset +
+                                offsetof(struct hfsplus_cat_folder, user_info),
+                                len);
+                found_bit = find_first_bit((void *)folder_finder_info, len*8);
+        } else if (entry_type == HFSPLUS_FILE) {
+                len = sizeof(struct FInfo) + sizeof(struct FXInfo);
+                hfs_bnode_read(fd.bnode, file_finder_info,
+                                fd.entryoffset +
+                                offsetof(struct hfsplus_cat_file, user_info),
+                                len);
+                found_bit = find_first_bit((void *)file_finder_info, len*8);
+        } else {
+                res = -EOPNOTSUPP;
+                goto end_listxattr_finder_info;
+        }
+        if (found_bit >= (len*8))
+                res = 0;
+        else {
+                symbols_count = sizeof(HFSPLUS_XATTR_FINDER_INFO_NAME) - 1;
+                xattr_name_len =
+                        name_len(HFSPLUS_XATTR_FINDER_INFO_NAME, symbols_count);
+                if (!buffer || !size) {
+                        if (can_list(HFSPLUS_XATTR_FINDER_INFO_NAME))
+                                res = xattr_name_len;
+                } else if (can_list(HFSPLUS_XATTR_FINDER_INFO_NAME)) {
+                        if (size < xattr_name_len)
+                                res = -ERANGE;
+                        else {
+                                res = copy_name(buffer,
+                                                HFSPLUS_XATTR_FINDER_INFO_NAME,
+                                                symbols_count);
+                        }
+                }
+        }
+end_listxattr_finder_info:
+        hfs_find_exit(&fd);
+        return res;
+}
+ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size)
+{
+        ssize_t err;
+        ssize_t res = 0;
+        struct inode *inode = dentry->d_inode;
+        struct hfs_find_data fd;
+        u16 key_len = 0;
+        struct hfsplus_attr_key attr_key;
+        char strbuf[HFSPLUS_ATTR_MAX_STRLEN +
+                        XATTR_MAC_OSX_PREFIX_LEN + 1] = {0};
+        int xattr_name_len;
+        if ((!S_ISREG(inode->i_mode) &&
+                        !S_ISDIR(inode->i_mode)) ||
+                                HFSPLUS_IS_RSRC(inode))
+                return -EOPNOTSUPP;
+        res = hfsplus_listxattr_finder_info(dentry, buffer, size);
+        if (res < 0)
+                return res;
+        else if (!HFSPLUS_SB(inode->i_sb)->attr_tree)
+                return (res == 0) ? -EOPNOTSUPP : res;
+        err = hfs_find_init(HFSPLUS_SB(inode->i_sb)->attr_tree, &fd);
+        if (err) {
+                printk(KERN_ERR "hfs: can't init xattr find struct\n");
+                return err;
+        }
+        err = hfsplus_find_attr(inode->i_sb, inode->i_ino, NULL, &fd);
+        if (err) {
+                if (err == -ENOENT) {
+                        if (res == 0)
+                                res = -ENODATA;
+                        goto end_listxattr;
+                } else {
+                        res = err;
+                        goto end_listxattr;
+                }
+        }
+        for (;;) {
+                key_len = hfs_bnode_read_u16(fd.bnode, fd.keyoffset);
+                if (key_len == 0 || key_len > fd.tree->max_key_len) {
+                        printk(KERN_ERR "hfs: invalid xattr key length: %d\n",
+                                                        key_len);
+                        res = -EIO;
+                        goto end_listxattr;
+                }
+                hfs_bnode_read(fd.bnode, &attr_key,
+                                fd.keyoffset, key_len + sizeof(key_len));
+                if (be32_to_cpu(attr_key.cnid) != inode->i_ino)
+                        goto end_listxattr;
+                xattr_name_len = HFSPLUS_ATTR_MAX_STRLEN;
+                if (hfsplus_uni2asc(inode->i_sb,
+                        (const struct hfsplus_unistr *)&fd.key->attr.key_name,
+                                        strbuf, &xattr_name_len)) {
+                        printk(KERN_ERR "hfs: unicode conversion failed\n");
+                        res = -EIO;
+                        goto end_listxattr;
+                }
+                if (!buffer || !size) {
+                        if (can_list(strbuf))
+                                res += name_len(strbuf, xattr_name_len);
+                } else if (can_list(strbuf)) {
+                        if (size < (res + name_len(strbuf, xattr_name_len))) {
+                                res = -ERANGE;
+                                goto end_listxattr;
+                        } else
+                                res += copy_name(buffer + res,
+                                                strbuf, xattr_name_len);
+                }
+                if (hfs_brec_goto(&fd, 1))
+                        goto end_listxattr;
+        }
+end_listxattr:
+        hfs_find_exit(&fd);
+        return res;
+}
+int hfsplus_removexattr(struct dentry *dentry, const char *name)
+{
+        int err = 0;
+        struct inode *inode = dentry->d_inode;
+        struct hfs_find_data cat_fd;
+        u16 flags;
+        u16 cat_entry_type;
+        int is_xattr_acl_deleted = 0;
+        int is_all_xattrs_deleted = 0;
+        if ((!S_ISREG(inode->i_mode) &&
+                        !S_ISDIR(inode->i_mode)) ||
+                                HFSPLUS_IS_RSRC(inode))
+                return -EOPNOTSUPP;
+        if (!HFSPLUS_SB(inode->i_sb)->attr_tree)
+                return -EOPNOTSUPP;
+        err = can_set_xattr(inode, name, NULL, 0);
+        if (err)
+                return err;
+        if (strncmp(name, XATTR_MAC_OSX_PREFIX,
+                                XATTR_MAC_OSX_PREFIX_LEN) == 0)
+                name += XATTR_MAC_OSX_PREFIX_LEN;
+        if (!strcmp_xattr_finder_info(name))
+                return -EOPNOTSUPP;
+        err = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &cat_fd);
+        if (err) {
+                printk(KERN_ERR "hfs: can't init xattr find struct\n");
+                return err;
+        }
+        err = hfsplus_find_cat(inode->i_sb, inode->i_ino, &cat_fd);
+        if (err) {
+                printk(KERN_ERR "hfs: catalog searching failed\n");
+                goto end_removexattr;
+        }
+        err = hfsplus_delete_attr(inode, name);
+        if (err)
+                goto end_removexattr;
+        is_xattr_acl_deleted = !strcmp_xattr_acl(name);
+        is_all_xattrs_deleted = !hfsplus_attr_exists(inode, NULL);
+        if (!is_xattr_acl_deleted && !is_all_xattrs_deleted)
+                goto end_removexattr;
+        cat_entry_type = hfs_bnode_read_u16(cat_fd.bnode, cat_fd.entryoffset);
+        if (cat_entry_type == HFSPLUS_FOLDER) {
+                flags = hfs_bnode_read_u16(cat_fd.bnode, cat_fd.entryoffset +
+                                offsetof(struct hfsplus_cat_folder, flags));
+                if (is_xattr_acl_deleted)
+                        flags &= ~HFSPLUS_ACL_EXISTS;
+                if (is_all_xattrs_deleted)
+                        flags &= ~HFSPLUS_XATTR_EXISTS;
+                hfs_bnode_write_u16(cat_fd.bnode, cat_fd.entryoffset +
+                                offsetof(struct hfsplus_cat_folder, flags),
+                                flags);
+                hfsplus_mark_inode_dirty(inode, HFSPLUS_I_CAT_DIRTY);
+        } else if (cat_entry_type == HFSPLUS_FILE) {
+                flags = hfs_bnode_read_u16(cat_fd.bnode, cat_fd.entryoffset +
+                                offsetof(struct hfsplus_cat_file, flags));
+                if (is_xattr_acl_deleted)
+                        flags &= ~HFSPLUS_ACL_EXISTS;
+                if (is_all_xattrs_deleted)
+                        flags &= ~HFSPLUS_XATTR_EXISTS;
+                hfs_bnode_write_u16(cat_fd.bnode, cat_fd.entryoffset +
+                                offsetof(struct hfsplus_cat_file, flags),
+                                flags);
+                hfsplus_mark_inode_dirty(inode, HFSPLUS_I_CAT_DIRTY);
+        } else {
+                printk(KERN_ERR "hfs: invalid catalog entry type\n");
+                err = -EIO;
+                goto end_removexattr;
+        }
+end_removexattr:
+        hfs_find_exit(&cat_fd);
+        return err;
+}
+static int hfsplus_osx_getxattr(struct dentry *dentry, const char *name,
+                                        void *buffer, size_t size, int type)
+{
+        char xattr_name[HFSPLUS_ATTR_MAX_STRLEN +
+                                XATTR_MAC_OSX_PREFIX_LEN + 1] = {0};
+        size_t len = strlen(name);
+        if (!strcmp(name, ""))
+                return -EINVAL;
+        if (len > HFSPLUS_ATTR_MAX_STRLEN)
+                return -EOPNOTSUPP;
+        strcpy(xattr_name, XATTR_MAC_OSX_PREFIX);
+        strcpy(xattr_name + XATTR_MAC_OSX_PREFIX_LEN, name);
+        return hfsplus_getxattr(dentry, xattr_name, buffer, size);
+}
+static int hfsplus_osx_setxattr(struct dentry *dentry, const char *name,
+                const void *buffer, size_t size, int flags, int type)
+{
+        char xattr_name[HFSPLUS_ATTR_MAX_STRLEN +
+                                XATTR_MAC_OSX_PREFIX_LEN + 1] = {0};
+        size_t len = strlen(name);
+        if (!strcmp(name, ""))
+                return -EINVAL;
+        if (len > HFSPLUS_ATTR_MAX_STRLEN)
+                return -EOPNOTSUPP;
+        strcpy(xattr_name, XATTR_MAC_OSX_PREFIX);
+        strcpy(xattr_name + XATTR_MAC_OSX_PREFIX_LEN, name);
+        return hfsplus_setxattr(dentry, xattr_name, buffer, size, flags);
+}
+static size_t hfsplus_osx_listxattr(struct dentry *dentry, char *list,
+                size_t list_size, const char *name, size_t name_len, int type)
+{
+        /*
+         * This method is not used.
+         * It is used hfsplus_listxattr() instead of generic_listxattr().
+         */
+        return -EOPNOTSUPP;
+}
+const struct xattr_handler hfsplus_xattr_osx_handler = {
+        .prefix = XATTR_MAC_OSX_PREFIX,
+        .list   = hfsplus_osx_listxattr,
+        .get    = hfsplus_osx_getxattr,
+        .set    = hfsplus_osx_setxattr,
+};
diff --git a/fs/hfsplus/xattr.h b/fs/hfsplus/xattr.h
new file mode 100644
index 000000000000..847b695b984d
--- /dev/null
+++ b/fs/hfsplus/xattr.h
@@ -0,0 +1,60 @@
+/*
+ * linux/fs/hfsplus/xattr.h
+ *
+ * Vyacheslav Dubeyko <slava@dubeyko.com>
+ *
+ * Logic of processing extended attributes
+ */
+#ifndef _LINUX_HFSPLUS_XATTR_H
+#define _LINUX_HFSPLUS_XATTR_H
+#include <linux/xattr.h>
+extern const struct xattr_handler hfsplus_xattr_osx_handler;
+extern const struct xattr_handler hfsplus_xattr_user_handler;
+extern const struct xattr_handler hfsplus_xattr_trusted_handler;
+/*extern const struct xattr_handler hfsplus_xattr_acl_access_handler;*/
+/*extern const struct xattr_handler hfsplus_xattr_acl_default_handler;*/
+extern const struct xattr_handler hfsplus_xattr_security_handler;
+extern const struct xattr_handler *hfsplus_xattr_handlers[];
+int __hfsplus_setxattr(struct inode *inode, const char *name,
+                        const void *value, size_t size, int flags);
+static inline int hfsplus_setxattr(struct dentry *dentry, const char *name,
+                        const void *value, size_t size, int flags)
+{
+        return __hfsplus_setxattr(dentry->d_inode, name, value, size, flags);
+}
+ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name,
+                        void *value, size_t size);
+ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size);
+int hfsplus_removexattr(struct dentry *dentry, const char *name);
+int hfsplus_init_security(struct inode *inode, struct inode *dir,
+                                const struct qstr *qstr);
+static inline int hfsplus_init_acl(struct inode *inode, struct inode *dir)
+{
+        /*TODO: implement*/
+        return 0;
+}
+static inline int hfsplus_init_inode_security(struct inode *inode,
+                                                struct inode *dir,
+                                                const struct qstr *qstr)
+{
+        int err;
+        err = hfsplus_init_acl(inode, dir);
+        if (!err)
+                err = hfsplus_init_security(inode, dir, qstr);
+        return err;
+}
+#endif
diff --git a/fs/hfsplus/xattr_security.c b/fs/hfsplus/xattr_security.c
new file mode 100644
index 000000000000..83b842f113c5
--- /dev/null
+++ b/fs/hfsplus/xattr_security.c
@@ -0,0 +1,104 @@
+/*
+ * linux/fs/hfsplus/xattr_trusted.c
+ *
+ * Vyacheslav Dubeyko <slava@dubeyko.com>
+ *
+ * Handler for storing security labels as extended attributes.
+ */
+#include <linux/security.h>
+#include "hfsplus_fs.h"
+#include "xattr.h"
+static int hfsplus_security_getxattr(struct dentry *dentry, const char *name,
+                                        void *buffer, size_t size, int type)
+{
+        char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0};
+        size_t len = strlen(name);
+        if (!strcmp(name, ""))
+                return -EINVAL;
+        if (len + XATTR_SECURITY_PREFIX_LEN > HFSPLUS_ATTR_MAX_STRLEN)
+                return -EOPNOTSUPP;
+        strcpy(xattr_name, XATTR_SECURITY_PREFIX);
+        strcpy(xattr_name + XATTR_SECURITY_PREFIX_LEN, name);
+        return hfsplus_getxattr(dentry, xattr_name, buffer, size);
+}
+static int hfsplus_security_setxattr(struct dentry *dentry, const char *name,
+                const void *buffer, size_t size, int flags, int type)
+{
+        char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0};
+        size_t len = strlen(name);
+        if (!strcmp(name, ""))
+                return -EINVAL;
+        if (len + XATTR_SECURITY_PREFIX_LEN > HFSPLUS_ATTR_MAX_STRLEN)
+                return -EOPNOTSUPP;
+        strcpy(xattr_name, XATTR_SECURITY_PREFIX);
+        strcpy(xattr_name + XATTR_SECURITY_PREFIX_LEN, name);
+        return hfsplus_setxattr(dentry, xattr_name, buffer, size, flags);
+}
+static size_t hfsplus_security_listxattr(struct dentry *dentry, char *list,
+                size_t list_size, const char *name, size_t name_len, int type)
+{
+        /*
+         * This method is not used.
+         * It is used hfsplus_listxattr() instead of generic_listxattr().
+         */
+        return -EOPNOTSUPP;
+}
+static int hfsplus_initxattrs(struct inode *inode,
+                                const struct xattr *xattr_array,
+                                void *fs_info)
+{
+        const struct xattr *xattr;
+        char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0};
+        size_t xattr_name_len;
+        int err = 0;
+        for (xattr = xattr_array; xattr->name != NULL; xattr++) {
+                xattr_name_len = strlen(xattr->name);
+                if (xattr_name_len == 0)
+                        continue;
+                if (xattr_name_len + XATTR_SECURITY_PREFIX_LEN >
+                                HFSPLUS_ATTR_MAX_STRLEN)
+                        return -EOPNOTSUPP;
+                strcpy(xattr_name, XATTR_SECURITY_PREFIX);
+                strcpy(xattr_name +
+                        XATTR_SECURITY_PREFIX_LEN, xattr->name);
+                memset(xattr_name +
+                        XATTR_SECURITY_PREFIX_LEN + xattr_name_len, 0, 1);
+                err = __hfsplus_setxattr(inode, xattr_name,
+                                        xattr->value, xattr->value_len, 0);
+                if (err)
+                        break;
+        }
+        return err;
+}
+int hfsplus_init_security(struct inode *inode, struct inode *dir,
+                                const struct qstr *qstr)
+{
+        return security_inode_init_security(inode, dir, qstr,
+                                        &hfsplus_initxattrs, NULL);
+}
+const struct xattr_handler hfsplus_xattr_security_handler = {
+        .prefix = XATTR_SECURITY_PREFIX,
+        .list   = hfsplus_security_listxattr,
+        .get    = hfsplus_security_getxattr,
+        .set    = hfsplus_security_setxattr,
+};
diff --git a/fs/hfsplus/xattr_trusted.c b/fs/hfsplus/xattr_trusted.c
new file mode 100644
index 000000000000..426cee277542
--- /dev/null
+++ b/fs/hfsplus/xattr_trusted.c
@@ -0,0 +1,63 @@
+/*
+ * linux/fs/hfsplus/xattr_trusted.c
+ *
+ * Vyacheslav Dubeyko <slava@dubeyko.com>
+ *
+ * Handler for trusted extended attributes.
+ */
+#include "hfsplus_fs.h"
+#include "xattr.h"
+static int hfsplus_trusted_getxattr(struct dentry *dentry, const char *name,
+                                        void *buffer, size_t size, int type)
+{
+        char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0};
+        size_t len = strlen(name);
+        if (!strcmp(name, ""))
+                return -EINVAL;
+        if (len + XATTR_TRUSTED_PREFIX_LEN > HFSPLUS_ATTR_MAX_STRLEN)
+                return -EOPNOTSUPP;
+        strcpy(xattr_name, XATTR_TRUSTED_PREFIX);
+        strcpy(xattr_name + XATTR_TRUSTED_PREFIX_LEN, name);
+        return hfsplus_getxattr(dentry, xattr_name, buffer, size);
+}
+static int hfsplus_trusted_setxattr(struct dentry *dentry, const char *name,
+                const void *buffer, size_t size, int flags, int type)
+{
+        char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0};
+        size_t len = strlen(name);
+        if (!strcmp(name, ""))
+                return -EINVAL;
+        if (len + XATTR_TRUSTED_PREFIX_LEN > HFSPLUS_ATTR_MAX_STRLEN)
+                return -EOPNOTSUPP;
+        strcpy(xattr_name, XATTR_TRUSTED_PREFIX);
+        strcpy(xattr_name + XATTR_TRUSTED_PREFIX_LEN, name);
+        return hfsplus_setxattr(dentry, xattr_name, buffer, size, flags);
+}
+static size_t hfsplus_trusted_listxattr(struct dentry *dentry, char *list,
+                size_t list_size, const char *name, size_t name_len, int type)
+{
+        /*
+         * This method is not used.
+         * It is used hfsplus_listxattr() instead of generic_listxattr().
+         */
+        return -EOPNOTSUPP;
+}
+const struct xattr_handler hfsplus_xattr_trusted_handler = {
+        .prefix = XATTR_TRUSTED_PREFIX,
+        .list   = hfsplus_trusted_listxattr,
+        .get    = hfsplus_trusted_getxattr,
+        .set    = hfsplus_trusted_setxattr,
+};
diff --git a/fs/hfsplus/xattr_user.c b/fs/hfsplus/xattr_user.c
new file mode 100644
index 000000000000..e34016561ae0
--- /dev/null
+++ b/fs/hfsplus/xattr_user.c
@@ -0,0 +1,63 @@
+/*
+ * linux/fs/hfsplus/xattr_user.c
+ *
+ * Vyacheslav Dubeyko <slava@dubeyko.com>
+ *
+ * Handler for user extended attributes.
+ */
+#include "hfsplus_fs.h"
+#include "xattr.h"
+static int hfsplus_user_getxattr(struct dentry *dentry, const char *name,
+                                        void *buffer, size_t size, int type)
+{
+        char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0};
+        size_t len = strlen(name);
+        if (!strcmp(name, ""))
+                return -EINVAL;
+        if (len + XATTR_USER_PREFIX_LEN > HFSPLUS_ATTR_MAX_STRLEN)
+                return -EOPNOTSUPP;
+        strcpy(xattr_name, XATTR_USER_PREFIX);
+        strcpy(xattr_name + XATTR_USER_PREFIX_LEN, name);
+        return hfsplus_getxattr(dentry, xattr_name, buffer, size);
+}
+static int hfsplus_user_setxattr(struct dentry *dentry, const char *name,
+                const void *buffer, size_t size, int flags, int type)
+{
+        char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0};
+        size_t len = strlen(name);
+        if (!strcmp(name, ""))
+                return -EINVAL;
+        if (len + XATTR_USER_PREFIX_LEN > HFSPLUS_ATTR_MAX_STRLEN)
+                return -EOPNOTSUPP;
+        strcpy(xattr_name, XATTR_USER_PREFIX);
+        strcpy(xattr_name + XATTR_USER_PREFIX_LEN, name);
+        return hfsplus_setxattr(dentry, xattr_name, buffer, size, flags);
+}
+static size_t hfsplus_user_listxattr(struct dentry *dentry, char *list,
+                size_t list_size, const char *name, size_t name_len, int type)
+{
+        /*
+         * This method is not used.
+         * It is used hfsplus_listxattr() instead of generic_listxattr().
+         */
+        return -EOPNOTSUPP;
+}
+const struct xattr_handler hfsplus_xattr_user_handler = {
+        .prefix = XATTR_USER_PREFIX,
+        .list   = hfsplus_user_listxattr,
+        .get    = hfsplus_user_getxattr,
+        .set    = hfsplus_user_setxattr,
+};
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 457addc5c91f..fbabb906066f 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -30,7 +30,7 @@ static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode)
        return list_entry(inode, struct hostfs_inode_info, vfs_inode);
 }
-#define FILE_HOSTFS_I(file) HOSTFS_I((file)->f_path.dentry->d_inode)
+#define FILE_HOSTFS_I(file) HOSTFS_I(file_inode(file))
 static int hostfs_d_delete(const struct dentry *dentry)
 {
@@ -861,14 +861,6 @@ int hostfs_setattr(struct dentry *dentry, struct iattr *attr)
 }
 static const struct inode_operations hostfs_iops = {
-        .create         = hostfs_create,
-        .link           = hostfs_link,
-        .unlink         = hostfs_unlink,
-        .symlink        = hostfs_symlink,
-        .mkdir          = hostfs_mkdir,
-        .rmdir          = hostfs_rmdir,
-        .mknod          = hostfs_mknod,
-        .rename         = hostfs_rename,
        .permission     = hostfs_permission,
        .setattr        = hostfs_setattr,
 };
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index 78e12b2e0ea2..546f6d39713a 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -25,7 +25,7 @@ static loff_t hpfs_dir_lseek(struct file *filp, loff_t off, int whence)
        loff_t new_off = off + (whence == 1 ? filp->f_pos : 0);
        loff_t pos;
        struct quad_buffer_head qbh;
-        struct inode *i = filp->f_path.dentry->d_inode;
+        struct inode *i = file_inode(filp);
        struct hpfs_inode_info *hpfs_inode = hpfs_i(i);
        struct super_block *s = i->i_sb;
@@ -57,7 +57,7 @@ fail:
 static int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct hpfs_inode_info *hpfs_inode = hpfs_i(inode);
        struct quad_buffer_head qbh;
        struct hpfs_dirent *de;
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index fbfe2df5624b..9f9dbeceeee7 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -152,7 +152,7 @@ static ssize_t hpfs_file_write(struct file *file, const char __user *buf,
        retval = do_sync_write(file, buf, count, ppos);
        if (retval > 0) {
                hpfs_lock(file->f_path.dentry->d_sb);
-                hpfs_i(file->f_path.dentry->d_inode)->i_dirty = 1;
+                hpfs_i(file_inode(file))->i_dirty = 1;
                hpfs_unlock(file->f_path.dentry->d_sb);
        }
        return retval;
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 5dc06c837105..9edeeb0ea97e 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -147,7 +147,7 @@ static void hpfs_write_inode_ea(struct inode *i, struct fnode *fnode)
        /*if (le32_to_cpu(fnode->acl_size_l) || le16_to_cpu(fnode->acl_size_s)) {
                   Some unknown structures like ACL may be in fnode,
                   we'd better not overwrite them
-                hpfs_error(i->i_sb, "fnode %08x has some unknown HPFS386 stuctures", i->i_ino);
+                hpfs_error(i->i_sb, "fnode %08x has some unknown HPFS386 structures", i->i_ino);
        } else*/ if (hpfs_sb(i->i_sb)->sb_eas >= 2) {
                __le32 ea;
                if (!uid_eq(i->i_uid, hpfs_sb(i->i_sb)->sb_uid) || hpfs_inode->i_ea_uid) {
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 43b315f2002b..74f55703be49 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -180,7 +180,7 @@ static ssize_t read_proc(struct file *file, char __user *buf, ssize_t count,
        ssize_t (*read)(struct file *, char __user *, size_t, loff_t *);
        ssize_t n;
-        read = file->f_path.dentry->d_inode->i_fop->read;
+        read = file_inode(file)->i_fop->read;
        if (!is_user)
                set_fs(KERNEL_DS);
@@ -288,7 +288,7 @@ static ssize_t hppfs_write(struct file *file, const char __user *buf,
        struct file *proc_file = data->proc_file;
        ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *);
-        write = proc_file->f_path.dentry->d_inode->i_fop->write;
+        write = file_inode(proc_file)->i_fop->write;
        return (*write)(proc_file, buf, len, ppos);
 }
@@ -513,7 +513,7 @@ static loff_t hppfs_llseek(struct file *file, loff_t off, int where)
        loff_t (*llseek)(struct file *, loff_t, int);
        loff_t ret;
-        llseek = proc_file->f_path.dentry->d_inode->i_fop->llseek;
+        llseek = file_inode(proc_file)->i_fop->llseek;
        if (llseek != NULL) {
                ret = (*llseek)(proc_file, off, where);
                if (ret < 0)
@@ -561,7 +561,7 @@ static int hppfs_readdir(struct file *file, void *ent, filldir_t filldir)
                                      });
        int err;
-        readdir = proc_file->f_path.dentry->d_inode->i_fop->readdir;
+        readdir = file_inode(proc_file)->i_fop->readdir;
        proc_file->f_pos = file->f_pos;
        err = (*readdir)(proc_file, &dirent, hppfs_filldir);
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 78bde32ea951..7f94e0cbc69c 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -97,7 +97,7 @@ static void huge_pagevec_release(struct pagevec *pvec)
 static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 {
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(file);
        loff_t len, vma_len;
        int ret;
        struct hstate *h = hstate_file(file);
@@ -918,16 +918,25 @@ static int get_hstate_idx(int page_size_log)
        return h - hstates;
 }
+static char *hugetlb_dname(struct dentry *dentry, char *buffer, int buflen)
+{
+        return dynamic_dname(dentry, buffer, buflen, "/%s (deleted)",
+                                dentry->d_name.name);
+}
+static struct dentry_operations anon_ops = {
+        .d_dname = hugetlb_dname
+};
 struct file *hugetlb_file_setup(const char *name, unsigned long addr,
                                size_t size, vm_flags_t acctflag,
                                struct user_struct **user,
                                int creat_flags, int page_size_log)
 {
-        int error = -ENOMEM;
+        struct file *file = ERR_PTR(-ENOMEM);
-        struct file *file;
        struct inode *inode;
        struct path path;
-        struct dentry *root;
+        struct super_block *sb;
        struct qstr quick_string;
        struct hstate *hstate;
        unsigned long num_pages;
@@ -955,17 +964,18 @@ struct file *hugetlb_file_setup(const char *name, unsigned long addr,
                }
        }
-        root = hugetlbfs_vfsmount[hstate_idx]->mnt_root;
+        sb = hugetlbfs_vfsmount[hstate_idx]->mnt_sb;
        quick_string.name = name;
        quick_string.len = strlen(quick_string.name);
        quick_string.hash = 0;
-        path.dentry = d_alloc(root, &quick_string);
+        path.dentry = d_alloc_pseudo(sb, &quick_string);
        if (!path.dentry)
                goto out_shm_unlock;
+        d_set_d_op(path.dentry, &anon_ops);
        path.mnt = mntget(hugetlbfs_vfsmount[hstate_idx]);
-        error = -ENOSPC;
+        file = ERR_PTR(-ENOSPC);
-        inode = hugetlbfs_get_inode(root->d_sb, NULL, S_IFREG | S_IRWXUGO, 0);
+        inode = hugetlbfs_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0);
        if (!inode)
                goto out_dentry;
@@ -973,7 +983,7 @@ struct file *hugetlb_file_setup(const char *name, unsigned long addr,
        size += addr & ~huge_page_mask(hstate);
        num_pages = ALIGN(size, huge_page_size(hstate)) >>
                        huge_page_shift(hstate);
-        error = -ENOMEM;
+        file = ERR_PTR(-ENOMEM);
        if (hugetlb_reserve_pages(inode, 0, num_pages, NULL, acctflag))
                goto out_inode;
@@ -981,10 +991,9 @@ struct file *hugetlb_file_setup(const char *name, unsigned long addr,
        inode->i_size = size;
        clear_nlink(inode);
-        error = -ENFILE;
        file = alloc_file(&path, FMODE_WRITE | FMODE_READ,
                        &hugetlbfs_file_operations);
-        if (!file)
+        if (IS_ERR(file))
                goto out_dentry; /* inode is already attached */
        return file;
@@ -998,7 +1007,7 @@ out_shm_unlock:
                user_shm_unlock(size, *user);
                *user = NULL;
        }
-        return ERR_PTR(error);
+        return file;
 }
 static int __init init_hugetlbfs_fs(void)
diff --git a/fs/inode.c b/fs/inode.c
index 14084b72b259..f5f7c06c36fb 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -798,11 +798,10 @@ static struct inode *find_inode(struct super_block *sb,
                                int (*test)(struct inode *, void *),
                                void *data)
 {
-        struct hlist_node *node;
        struct inode *inode = NULL;
 repeat:
-        hlist_for_each_entry(inode, node, head, i_hash) {
+        hlist_for_each_entry(inode, head, i_hash) {
                spin_lock(&inode->i_lock);
                if (inode->i_sb != sb) {
                        spin_unlock(&inode->i_lock);
@@ -830,11 +829,10 @@ repeat:
 static struct inode *find_inode_fast(struct super_block *sb,
                                struct hlist_head *head, unsigned long ino)
 {
-        struct hlist_node *node;
        struct inode *inode = NULL;
 repeat:
-        hlist_for_each_entry(inode, node, head, i_hash) {
+        hlist_for_each_entry(inode, head, i_hash) {
                spin_lock(&inode->i_lock);
                if (inode->i_ino != ino) {
                        spin_unlock(&inode->i_lock);
@@ -1132,11 +1130,10 @@ EXPORT_SYMBOL(iget_locked);
 static int test_inode_iunique(struct super_block *sb, unsigned long ino)
 {
        struct hlist_head *b = inode_hashtable + hash(sb, ino);
-        struct hlist_node *node;
        struct inode *inode;
        spin_lock(&inode_hash_lock);
-        hlist_for_each_entry(inode, node, b, i_hash) {
+        hlist_for_each_entry(inode, b, i_hash) {
                if (inode->i_ino == ino && inode->i_sb == sb) {
                        spin_unlock(&inode_hash_lock);
                        return 0;
@@ -1291,10 +1288,9 @@ int insert_inode_locked(struct inode *inode)
        struct hlist_head *head = inode_hashtable + hash(sb, ino);
        while (1) {
-                struct hlist_node *node;
                struct inode *old = NULL;
                spin_lock(&inode_hash_lock);
-                hlist_for_each_entry(old, node, head, i_hash) {
+                hlist_for_each_entry(old, head, i_hash) {
                        if (old->i_ino != ino)
                                continue;
                        if (old->i_sb != sb)
@@ -1306,7 +1302,7 @@ int insert_inode_locked(struct inode *inode)
                        }
                        break;
                }
-                if (likely(!node)) {
+                if (likely(!old)) {
                        spin_lock(&inode->i_lock);
                        inode->i_state |= I_NEW;
                        hlist_add_head(&inode->i_hash, head);
@@ -1334,11 +1330,10 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
        struct hlist_head *head = inode_hashtable + hash(sb, hashval);
        while (1) {
-                struct hlist_node *node;
                struct inode *old = NULL;
                spin_lock(&inode_hash_lock);
-                hlist_for_each_entry(old, node, head, i_hash) {
+                hlist_for_each_entry(old, head, i_hash) {
                        if (old->i_sb != sb)
                                continue;
                        if (!test(old, data))
@@ -1350,7 +1345,7 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
                        }
                        break;
                }
-                if (likely(!node)) {
+                if (likely(!old)) {
                        spin_lock(&inode->i_lock);
                        inode->i_state |= I_NEW;
                        hlist_add_head(&inode->i_hash, head);
@@ -1655,7 +1650,7 @@ EXPORT_SYMBOL(file_remove_suid);
 int file_update_time(struct file *file)
 {
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(file);
        struct timespec now;
        int sync_it = 0;
        int ret;
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 3bdad6d1f268..fd507fb460f8 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -175,7 +175,7 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
        struct fiemap fiemap;
        struct fiemap __user *ufiemap = (struct fiemap __user *) arg;
        struct fiemap_extent_info fieinfo = { 0, };
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct super_block *sb = inode->i_sb;
        u64 len;
        int error;
@@ -424,7 +424,7 @@ EXPORT_SYMBOL(generic_block_fiemap);
 */
 int ioctl_preallocate(struct file *filp, void __user *argp)
 {
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct space_resv sr;
        if (copy_from_user(&sr, argp, sizeof(sr)))
@@ -449,7 +449,7 @@ int ioctl_preallocate(struct file *filp, void __user *argp)
 static int file_ioctl(struct file *filp, unsigned int cmd,
                unsigned long arg)
 {
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        int __user *p = (int __user *)arg;
        switch (cmd) {
@@ -512,7 +512,7 @@ static int ioctl_fioasync(unsigned int fd, struct file *filp,
 static int ioctl_fsfreeze(struct file *filp)
 {
-        struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
+        struct super_block *sb = file_inode(filp)->i_sb;
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
@@ -527,7 +527,7 @@ static int ioctl_fsfreeze(struct file *filp)
 static int ioctl_fsthaw(struct file *filp)
 {
-        struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
+        struct super_block *sb = file_inode(filp)->i_sb;
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
@@ -548,7 +548,7 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
 {
        int error = 0;
        int __user *argp = (int __user *)arg;
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        switch (cmd) {
        case FIOCLEX:
diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c
index 0b3fa7974fa8..592e5115a561 100644
--- a/fs/isofs/compress.c
+++ b/fs/isofs/compress.c
@@ -296,7 +296,7 @@ static int zisofs_fill_pages(struct inode *inode, int full_page, int pcount,
 */
 static int zisofs_readpage(struct file *file, struct page *page)
 {
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(file);
        struct address_space *mapping = inode->i_mapping;
        int err;
        int i, pcount, full_page;
diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c
index f20437c068a0..a7d5c3c3d4e6 100644
--- a/fs/isofs/dir.c
+++ b/fs/isofs/dir.c
@@ -253,7 +253,7 @@ static int isofs_readdir(struct file *filp,
        int result;
        char *tmpname;
        struct iso_directory_record *tmpde;
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        tmpname = (char *)__get_free_page(GFP_KERNEL);
        if (tmpname == NULL)
diff --git a/fs/isofs/export.c b/fs/isofs/export.c
index 2b4f2358eadb..12088d8de3fa 100644
--- a/fs/isofs/export.c
+++ b/fs/isofs/export.c
@@ -125,10 +125,10 @@ isofs_export_encode_fh(struct inode *inode,
         */
        if (parent && (len < 5)) {
                *max_len = 5;
-                return 255;
+                return FILEID_INVALID;
        } else if (len < 3) {
                *max_len = 3;
-                return 255;
+                return FILEID_INVALID;
        }
        len = 3;
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index a2862339323b..81cc7eaff863 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -446,7 +446,8 @@ int __log_start_commit(journal_t *journal, tid_t target)
         * currently running transaction (if it exists).  Otherwise,
         * the target tid must be an old one.
         */
-        if (journal->j_running_transaction &&
+        if (journal->j_commit_request != target &&
+            journal->j_running_transaction &&
            journal->j_running_transaction->t_tid == target) {
                /*
                 * We want a new commit: OK, mark the request and wakeup the
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 3091d42992f0..750c70148eff 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -435,7 +435,12 @@ void jbd2_journal_commit_transaction(journal_t *journal)
        trace_jbd2_commit_locking(journal, commit_transaction);
        stats.run.rs_wait = commit_transaction->t_max_wait;
+        stats.run.rs_request_delay = 0;
        stats.run.rs_locked = jiffies;
+        if (commit_transaction->t_requested)
+                stats.run.rs_request_delay =
+                        jbd2_time_diff(commit_transaction->t_requested,
+                                       stats.run.rs_locked);
        stats.run.rs_running = jbd2_time_diff(commit_transaction->t_start,
                                              stats.run.rs_locked);
@@ -1116,7 +1121,10 @@ restart_loop:
         */
        spin_lock(&journal->j_history_lock);
        journal->j_stats.ts_tid++;
+        if (commit_transaction->t_requested)
+                journal->j_stats.ts_requested++;
        journal->j_stats.run.rs_wait += stats.run.rs_wait;
+        journal->j_stats.run.rs_request_delay += stats.run.rs_request_delay;
        journal->j_stats.run.rs_running += stats.run.rs_running;
        journal->j_stats.run.rs_locked += stats.run.rs_locked;
        journal->j_stats.run.rs_flushing += stats.run.rs_flushing;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index dbf41f9452db..ed10991ab006 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -35,7 +35,6 @@
 #include <linux/kthread.h>
 #include <linux/poison.h>
 #include <linux/proc_fs.h>
-#include <linux/debugfs.h>
 #include <linux/seq_file.h>
 #include <linux/math64.h>
 #include <linux/hash.h>
@@ -51,6 +50,14 @@
 #include <asm/uaccess.h>
 #include <asm/page.h>
+#ifdef CONFIG_JBD2_DEBUG
+ushort jbd2_journal_enable_debug __read_mostly;
+EXPORT_SYMBOL(jbd2_journal_enable_debug);
+module_param_named(jbd2_debug, jbd2_journal_enable_debug, ushort, 0644);
+MODULE_PARM_DESC(jbd2_debug, "Debugging level for jbd2");
+#endif
 EXPORT_SYMBOL(jbd2_journal_extend);
 EXPORT_SYMBOL(jbd2_journal_stop);
 EXPORT_SYMBOL(jbd2_journal_lock_updates);
@@ -513,6 +520,10 @@ int __jbd2_log_space_left(journal_t *journal)
 */
 int __jbd2_log_start_commit(journal_t *journal, tid_t target)
 {
+        /* Return if the txn has already requested to be committed */
+        if (journal->j_commit_request == target)
+                return 0;
        /*
         * The only transaction we can possibly wait upon is the
         * currently running transaction (if it exists).  Otherwise,
@@ -529,6 +540,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target)
                jbd_debug(1, "JBD2: requesting commit %d/%d\n",
                          journal->j_commit_request,
                          journal->j_commit_sequence);
+                journal->j_running_transaction->t_requested = jiffies;
                wake_up(&journal->j_wait_commit);
                return 1;
        } else if (!tid_geq(journal->j_commit_request, target))
@@ -894,13 +906,18 @@ static int jbd2_seq_info_show(struct seq_file *seq, void *v)
        if (v != SEQ_START_TOKEN)
                return 0;
-        seq_printf(seq, "%lu transaction, each up to %u blocks\n",
+        seq_printf(seq, "%lu transactions (%lu requested), "
-                        s->stats->ts_tid,
+                   "each up to %u blocks\n",
-                        s->journal->j_max_transaction_buffers);
+                   s->stats->ts_tid, s->stats->ts_requested,
+                   s->journal->j_max_transaction_buffers);
        if (s->stats->ts_tid == 0)
                return 0;
        seq_printf(seq, "average: \n  %ums waiting for transaction\n",
            jiffies_to_msecs(s->stats->run.rs_wait / s->stats->ts_tid));
+        seq_printf(seq, "  %ums request delay\n",
+            (s->stats->ts_requested == 0) ? 0 :
+            jiffies_to_msecs(s->stats->run.rs_request_delay /
+                             s->stats->ts_requested));
        seq_printf(seq, "  %ums running transaction\n",
            jiffies_to_msecs(s->stats->run.rs_running / s->stats->ts_tid));
        seq_printf(seq, "  %ums transaction was being locked\n",
@@ -2485,45 +2502,6 @@ restart:
        spin_unlock(&journal->j_list_lock);
 }
-/*
- * debugfs tunables
- */
-#ifdef CONFIG_JBD2_DEBUG
-u8 jbd2_journal_enable_debug __read_mostly;
-EXPORT_SYMBOL(jbd2_journal_enable_debug);
-#define JBD2_DEBUG_NAME "jbd2-debug"
-static struct dentry *jbd2_debugfs_dir;
-static struct dentry *jbd2_debug;
-static void __init jbd2_create_debugfs_entry(void)
-{
-        jbd2_debugfs_dir = debugfs_create_dir("jbd2", NULL);
-        if (jbd2_debugfs_dir)
-                jbd2_debug = debugfs_create_u8(JBD2_DEBUG_NAME,
-                                               S_IRUGO | S_IWUSR,
-                                               jbd2_debugfs_dir,
-                                               &jbd2_journal_enable_debug);
-}
-static void __exit jbd2_remove_debugfs_entry(void)
-{
-        debugfs_remove(jbd2_debug);
-        debugfs_remove(jbd2_debugfs_dir);
-}
-#else
-static void __init jbd2_create_debugfs_entry(void)
-{
-}
-static void __exit jbd2_remove_debugfs_entry(void)
-{
-}
-#endif
 #ifdef CONFIG_PROC_FS
@@ -2609,7 +2587,6 @@ static int __init journal_init(void)
        ret = journal_init_caches();
        if (ret == 0) {
-                jbd2_create_debugfs_entry();
                jbd2_create_jbd_stats_proc_entry();
        } else {
                jbd2_journal_destroy_caches();
@@ -2624,7 +2601,6 @@ static void __exit journal_exit(void)
        if (n)
                printk(KERN_EMERG "JBD2: leaked %d journal_heads!\n", n);
 #endif
-        jbd2_remove_debugfs_entry();
        jbd2_remove_jbd_stats_proc_entry();
        jbd2_journal_destroy_caches();
 }
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index df9f29760efa..b7e2385c6e92 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -30,6 +30,8 @@
 #include <linux/bug.h>
 #include <linux/module.h>
+#include <trace/events/jbd2.h>
 static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
 static void __jbd2_journal_unfile_buffer(struct journal_head *jh);
@@ -100,6 +102,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
        journal->j_running_transaction = transaction;
        transaction->t_max_wait = 0;
        transaction->t_start = jiffies;
+        transaction->t_requested = 0;
        return transaction;
 }
@@ -306,6 +309,8 @@ repeat:
         */
        update_t_max_wait(transaction, ts);
        handle->h_transaction = transaction;
+        handle->h_requested_credits = nblocks;
+        handle->h_start_jiffies = jiffies;
        atomic_inc(&transaction->t_updates);
        atomic_inc(&transaction->t_handle_count);
        jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n",
@@ -352,7 +357,8 @@ static handle_t *new_handle(int nblocks)
 * Return a pointer to a newly allocated handle, or an ERR_PTR() value
 * on failure.
 */
-handle_t *jbd2__journal_start(journal_t *journal, int nblocks, gfp_t gfp_mask)
+handle_t *jbd2__journal_start(journal_t *journal, int nblocks, gfp_t gfp_mask,
+                              unsigned int type, unsigned int line_no)
 {
        handle_t *handle = journal_current_handle();
        int err;
@@ -378,6 +384,11 @@ handle_t *jbd2__journal_start(journal_t *journal, int nblocks, gfp_t gfp_mask)
                current->journal_info = NULL;
                handle = ERR_PTR(err);
        }
+        handle->h_type = type;
+        handle->h_line_no = line_no;
+        trace_jbd2_handle_start(journal->j_fs_dev->bd_dev,
+                                handle->h_transaction->t_tid, type,
+                                line_no, nblocks);
        return handle;
 }
 EXPORT_SYMBOL(jbd2__journal_start);
@@ -385,7 +396,7 @@ EXPORT_SYMBOL(jbd2__journal_start);
 handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
 {
-        return jbd2__journal_start(journal, nblocks, GFP_NOFS);
+        return jbd2__journal_start(journal, nblocks, GFP_NOFS, 0, 0);
 }
 EXPORT_SYMBOL(jbd2_journal_start);
@@ -447,7 +458,14 @@ int jbd2_journal_extend(handle_t *handle, int nblocks)
                goto unlock;
        }
+        trace_jbd2_handle_extend(journal->j_fs_dev->bd_dev,
+                                 handle->h_transaction->t_tid,
+                                 handle->h_type, handle->h_line_no,
+                                 handle->h_buffer_credits,
+                                 nblocks);
        handle->h_buffer_credits += nblocks;
+        handle->h_requested_credits += nblocks;
        atomic_add(nblocks, &transaction->t_outstanding_credits);
        result = 0;
@@ -1376,6 +1394,13 @@ int jbd2_journal_stop(handle_t *handle)
        }
        jbd_debug(4, "Handle %p going down\n", handle);
+        trace_jbd2_handle_stats(journal->j_fs_dev->bd_dev,
+                                handle->h_transaction->t_tid,
+                                handle->h_type, handle->h_line_no,
+                                jiffies - handle->h_start_jiffies,
+                                handle->h_sync, handle->h_requested_credits,
+                                (handle->h_requested_credits -
+                                 handle->h_buffer_credits));
        /*
         * Implement synchronous transaction batching.  If the handle
diff --git a/fs/jffs2/Kconfig b/fs/jffs2/Kconfig
index 6ae169cd8faa..d8bb6c411e96 100644
--- a/fs/jffs2/Kconfig
+++ b/fs/jffs2/Kconfig
@@ -50,8 +50,8 @@ config JFFS2_FS_WBUF_VERIFY
          write-buffer, and check for errors.
 config JFFS2_SUMMARY
-        bool "JFFS2 summary support (EXPERIMENTAL)"
+        bool "JFFS2 summary support"
-        depends on JFFS2_FS && EXPERIMENTAL
+        depends on JFFS2_FS
        default n
        help
          This feature makes it possible to use summary information
@@ -63,8 +63,8 @@ config JFFS2_SUMMARY
          If unsure, say 'N'.
 config JFFS2_FS_XATTR
-        bool "JFFS2 XATTR support (EXPERIMENTAL)"
+        bool "JFFS2 XATTR support"
-        depends on JFFS2_FS && EXPERIMENTAL
+        depends on JFFS2_FS
        default n
        help
          Extended attributes are name:value pairs associated with inodes by
@@ -173,7 +173,7 @@ config JFFS2_CMODE_PRIORITY
          successful one.
 config JFFS2_CMODE_SIZE
-        bool "size (EXPERIMENTAL)"
+        bool "size"
        help
          Tries all compressors and chooses the one which has the smallest
          result.
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index ad7774d32095..acd46a4160cb 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -117,12 +117,12 @@ static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target,
 static int jffs2_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
        struct jffs2_inode_info *f;
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct jffs2_full_dirent *fd;
        unsigned long offset, curofs;
        jffs2_dbg(1, "jffs2_readdir() for dir_i #%lu\n",
-                  filp->f_path.dentry->d_inode->i_ino);
+                  file_inode(filp)->i_ino);
        f = JFFS2_INODE_INFO(inode);
diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c
index bc555ff417e9..93a1232894f6 100644
--- a/fs/jfs/ioctl.c
+++ b/fs/jfs/ioctl.c
@@ -58,7 +58,7 @@ static long jfs_map_ext2(unsigned long flags, int from)
 long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
-        struct inode *inode = filp->f_dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct jfs_inode_info *jfs_inode = JFS_IP(inode);
        unsigned int flags;
diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
index 9197a1b0d02d..0ddbeceafc62 100644
--- a/fs/jfs/jfs_dtree.c
+++ b/fs/jfs/jfs_dtree.c
@@ -3004,7 +3004,7 @@ static inline struct jfs_dirent *next_jfs_dirent(struct jfs_dirent *dirent)
 */
 int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-        struct inode *ip = filp->f_path.dentry->d_inode;
+        struct inode *ip = file_inode(filp);
        struct nls_table *codepage = JFS_SBI(ip->i_sb)->nls_tab;
        int rc = 0;
        loff_t dtpos;   /* legacy OS/2 style position */
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 1a543be09c79..060ba638becb 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -154,7 +154,7 @@ static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf)
        /*
         * If we really return the number of allocated & free inodes, some
         * applications will fail because they won't see enough free inodes.
-         * We'll try to calculate some guess as to how may inodes we can
+         * We'll try to calculate some guess as to how many inodes we can
         * really allocate
         *
         * buf->f_files = atomic_read(&imap->im_numinos);
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 6cd673d34fb9..0796c45d0d4d 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -178,7 +178,7 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock)
                        continue;
                if (!rpc_cmp_addr(nlm_addr(block->b_host), addr))
                        continue;
-                if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_path.dentry->d_inode) ,fh) != 0)
+                if (nfs_compare_fh(NFS_FH(file_inode(fl_blocked->fl_file)) ,fh) != 0)
                        continue;
                /* Alright, we found a lock. Set the return status
                 * and wake up the caller
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index b43114c4332a..7e529c3c45c0 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -127,7 +127,7 @@ static void nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl)
        struct nlm_lock *lock = &argp->lock;
        nlmclnt_next_cookie(&argp->cookie);
-        memcpy(&lock->fh, NFS_FH(fl->fl_file->f_path.dentry->d_inode), sizeof(struct nfs_fh));
+        memcpy(&lock->fh, NFS_FH(file_inode(fl->fl_file)), sizeof(struct nfs_fh));
        lock->caller  = utsname()->nodename;
        lock->oh.data = req->a_owner;
        lock->oh.len  = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s",
@@ -550,6 +550,9 @@ again:
                status = nlmclnt_block(block, req, NLMCLNT_POLL_TIMEOUT);
                if (status < 0)
                        break;
+                /* Resend the blocking lock request after a server reboot */
+                if (resp->status ==  nlm_lck_denied_grace_period)
+                        continue;
                if (resp->status != nlm_lck_blocked)
                        break;
        }
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 764c4d2ed804..969d589c848d 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -33,15 +33,15 @@
 static struct hlist_head        nlm_server_hosts[NLM_HOST_NRHASH];
 static struct hlist_head        nlm_client_hosts[NLM_HOST_NRHASH];
-#define for_each_host(host, pos, chain, table) \
+#define for_each_host(host, chain, table) \
        for ((chain) = (table); \
             (chain) < (table) + NLM_HOST_NRHASH; ++(chain)) \
-                hlist_for_each_entry((host), (pos), (chain), h_hash)
+                hlist_for_each_entry((host), (chain), h_hash)
-#define for_each_host_safe(host, pos, next, chain, table) \
+#define for_each_host_safe(host, next, chain, table) \
        for ((chain) = (table); \
             (chain) < (table) + NLM_HOST_NRHASH; ++(chain)) \
-                hlist_for_each_entry_safe((host), (pos), (next), \
+                hlist_for_each_entry_safe((host), (next), \
                                                (chain), h_hash)
 static unsigned long            nrhosts;
@@ -226,7 +226,6 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
                .net            = net,
        };
        struct hlist_head *chain;
-        struct hlist_node *pos;
        struct nlm_host *host;
        struct nsm_handle *nsm = NULL;
        struct lockd_net *ln = net_generic(net, lockd_net_id);
@@ -238,7 +237,7 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
        mutex_lock(&nlm_host_mutex);
        chain = &nlm_client_hosts[nlm_hash_address(sap)];
-        hlist_for_each_entry(host, pos, chain, h_hash) {
+        hlist_for_each_entry(host, chain, h_hash) {
                if (host->net != net)
                        continue;
                if (!rpc_cmp_addr(nlm_addr(host), sap))
@@ -323,7 +322,6 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
                                    const size_t hostname_len)
 {
        struct hlist_head *chain;
-        struct hlist_node *pos;
        struct nlm_host *host = NULL;
        struct nsm_handle *nsm = NULL;
        struct sockaddr *src_sap = svc_daddr(rqstp);
@@ -351,7 +349,7 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
                nlm_gc_hosts(net);
        chain = &nlm_server_hosts[nlm_hash_address(ni.sap)];
-        hlist_for_each_entry(host, pos, chain, h_hash) {
+        hlist_for_each_entry(host, chain, h_hash) {
                if (host->net != net)
                        continue;
                if (!rpc_cmp_addr(nlm_addr(host), ni.sap))
@@ -516,10 +514,9 @@ static struct nlm_host *next_host_state(struct hlist_head *cache,
 {
        struct nlm_host *host;
        struct hlist_head *chain;
-        struct hlist_node *pos;
        mutex_lock(&nlm_host_mutex);
-        for_each_host(host, pos, chain, cache) {
+        for_each_host(host, chain, cache) {
                if (host->h_nsmhandle == nsm
                    && host->h_nsmstate != info->state) {
                        host->h_nsmstate = info->state;
@@ -571,7 +568,6 @@ void nlm_host_rebooted(const struct nlm_reboot *info)
 static void nlm_complain_hosts(struct net *net)
 {
        struct hlist_head *chain;
-        struct hlist_node *pos;
        struct nlm_host *host;
        if (net) {
@@ -588,7 +584,7 @@ static void nlm_complain_hosts(struct net *net)
                dprintk("lockd: %lu hosts left:\n", nrhosts);
        }
-        for_each_host(host, pos, chain, nlm_server_hosts) {
+        for_each_host(host, chain, nlm_server_hosts) {
                if (net && host->net != net)
                        continue;
                dprintk("       %s (cnt %d use %d exp %ld net %p)\n",
@@ -601,14 +597,13 @@ void
 nlm_shutdown_hosts_net(struct net *net)
 {
        struct hlist_head *chain;
-        struct hlist_node *pos;
        struct nlm_host *host;
        mutex_lock(&nlm_host_mutex);
        /* First, make all hosts eligible for gc */
        dprintk("lockd: nuking all hosts in net %p...\n", net);
-        for_each_host(host, pos, chain, nlm_server_hosts) {
+        for_each_host(host, chain, nlm_server_hosts) {
                if (net && host->net != net)
                        continue;
                host->h_expires = jiffies - 1;
@@ -645,11 +640,11 @@ static void
 nlm_gc_hosts(struct net *net)
 {
        struct hlist_head *chain;
-        struct hlist_node *pos, *next;
+        struct hlist_node *next;
        struct nlm_host *host;
        dprintk("lockd: host garbage collection for net %p\n", net);
-        for_each_host(host, pos, chain, nlm_server_hosts) {
+        for_each_host(host, chain, nlm_server_hosts) {
                if (net && host->net != net)
                        continue;
                host->h_inuse = 0;
@@ -658,7 +653,7 @@ nlm_gc_hosts(struct net *net)
        /* Mark all hosts that hold locks, blocks or shares */
        nlmsvc_mark_resources(net);
-        for_each_host_safe(host, pos, next, chain, nlm_server_hosts) {
+        for_each_host_safe(host, next, chain, nlm_server_hosts) {
                if (net && host->net != net)
                        continue;
                if (atomic_read(&host->h_count) || host->h_inuse
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 8d80c990dffd..e703318c41df 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -406,8 +406,8 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
        __be32                  ret;
        dprintk("lockd: nlmsvc_lock(%s/%ld, ty=%d, pi=%d, %Ld-%Ld, bl=%d)\n",
-                                file->f_file->f_path.dentry->d_inode->i_sb->s_id,
+                                file_inode(file->f_file)->i_sb->s_id,
-                                file->f_file->f_path.dentry->d_inode->i_ino,
+                                file_inode(file->f_file)->i_ino,
                                lock->fl.fl_type, lock->fl.fl_pid,
                                (long long)lock->fl.fl_start,
                                (long long)lock->fl.fl_end,
@@ -513,8 +513,8 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
        __be32                  ret;
        dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n",
-                                file->f_file->f_path.dentry->d_inode->i_sb->s_id,
+                                file_inode(file->f_file)->i_sb->s_id,
-                                file->f_file->f_path.dentry->d_inode->i_ino,
+                                file_inode(file->f_file)->i_ino,
                                lock->fl.fl_type,
                                (long long)lock->fl.fl_start,
                                (long long)lock->fl.fl_end);
@@ -606,8 +606,8 @@ nlmsvc_unlock(struct net *net, struct nlm_file *file, struct nlm_lock *lock)
        int     error;
        dprintk("lockd: nlmsvc_unlock(%s/%ld, pi=%d, %Ld-%Ld)\n",
-                                file->f_file->f_path.dentry->d_inode->i_sb->s_id,
+                                file_inode(file->f_file)->i_sb->s_id,
-                                file->f_file->f_path.dentry->d_inode->i_ino,
+                                file_inode(file->f_file)->i_ino,
                                lock->fl.fl_pid,
                                (long long)lock->fl.fl_start,
                                (long long)lock->fl.fl_end);
@@ -635,8 +635,8 @@ nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *l
        int status = 0;
        dprintk("lockd: nlmsvc_cancel(%s/%ld, pi=%d, %Ld-%Ld)\n",
-                                file->f_file->f_path.dentry->d_inode->i_sb->s_id,
+                                file_inode(file->f_file)->i_sb->s_id,
-                                file->f_file->f_path.dentry->d_inode->i_ino,
+                                file_inode(file->f_file)->i_ino,
                                lock->fl.fl_pid,
                                (long long)lock->fl.fl_start,
                                (long long)lock->fl.fl_end);
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 8064435e8bef..97e87415b145 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -45,7 +45,7 @@ static inline void nlm_debug_print_fh(char *msg, struct nfs_fh *f)
 static inline void nlm_debug_print_file(char *msg, struct nlm_file *file)
 {
-        struct inode *inode = file->f_file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(file->f_file);
        dprintk("lockd: %s %s/%ld\n",
                msg, inode->i_sb->s_id, inode->i_ino);
@@ -84,7 +84,6 @@ __be32
 nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result,
                                        struct nfs_fh *f)
 {
-        struct hlist_node *pos;
        struct nlm_file *file;
        unsigned int    hash;
        __be32          nfserr;
@@ -96,7 +95,7 @@ nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result,
        /* Lock file table */
        mutex_lock(&nlm_file_mutex);
-        hlist_for_each_entry(file, pos, &nlm_files[hash], f_list)
+        hlist_for_each_entry(file, &nlm_files[hash], f_list)
                if (!nfs_compare_fh(&file->f_handle, f))
                        goto found;
@@ -248,13 +247,13 @@ static int
 nlm_traverse_files(void *data, nlm_host_match_fn_t match,
                int (*is_failover_file)(void *data, struct nlm_file *file))
 {
-        struct hlist_node *pos, *next;
+        struct hlist_node *next;
        struct nlm_file *file;
        int i, ret = 0;
        mutex_lock(&nlm_file_mutex);
        for (i = 0; i < FILE_NRHASH; i++) {
-                hlist_for_each_entry_safe(file, pos, next, &nlm_files[i], f_list) {
+                hlist_for_each_entry_safe(file, next, &nlm_files[i], f_list) {
                        if (is_failover_file && !is_failover_file(data, file))
                                continue;
                        file->f_count++;
diff --git a/fs/locks.c b/fs/locks.c
index a94e331a52a2..cb424a4fed71 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -334,7 +334,7 @@ static int flock_to_posix_lock(struct file *filp, struct file_lock *fl,
                start = filp->f_pos;
                break;
        case SEEK_END:
-                start = i_size_read(filp->f_path.dentry->d_inode);
+                start = i_size_read(file_inode(filp));
                break;
        default:
                return -EINVAL;
@@ -384,7 +384,7 @@ static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl,
                start = filp->f_pos;
                break;
        case SEEK_END:
-                start = i_size_read(filp->f_path.dentry->d_inode);
+                start = i_size_read(file_inode(filp));
                break;
        default:
                return -EINVAL;
@@ -627,7 +627,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
        struct file_lock *cfl;
        lock_flocks();
-        for (cfl = filp->f_path.dentry->d_inode->i_flock; cfl; cfl = cfl->fl_next) {
+        for (cfl = file_inode(filp)->i_flock; cfl; cfl = cfl->fl_next) {
                if (!IS_POSIX(cfl))
                        continue;
                if (posix_locks_conflict(fl, cfl))
@@ -708,7 +708,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
 {
        struct file_lock *new_fl = NULL;
        struct file_lock **before;
-        struct inode * inode = filp->f_path.dentry->d_inode;
+        struct inode * inode = file_inode(filp);
        int error = 0;
        int found = 0;
@@ -1002,7 +1002,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
 int posix_lock_file(struct file *filp, struct file_lock *fl,
                        struct file_lock *conflock)
 {
-        return __posix_lock_file(filp->f_path.dentry->d_inode, fl, conflock);
+        return __posix_lock_file(file_inode(filp), fl, conflock);
 }
 EXPORT_SYMBOL(posix_lock_file);
@@ -1326,8 +1326,8 @@ int fcntl_getlease(struct file *filp)
        int type = F_UNLCK;
        lock_flocks();
-        time_out_leases(filp->f_path.dentry->d_inode);
+        time_out_leases(file_inode(filp));
-        for (fl = filp->f_path.dentry->d_inode->i_flock; fl && IS_LEASE(fl);
+        for (fl = file_inode(filp)->i_flock; fl && IS_LEASE(fl);
                        fl = fl->fl_next) {
                if (fl->fl_file == filp) {
                        type = target_leasetype(fl);
@@ -1843,7 +1843,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
        if (copy_from_user(&flock, l, sizeof(flock)))
                goto out;
-        inode = filp->f_path.dentry->d_inode;
+        inode = file_inode(filp);
        /* Don't allow mandatory locks on files that may be memory mapped
         * and shared.
@@ -1961,7 +1961,7 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
        if (copy_from_user(&flock, l, sizeof(flock)))
                goto out;
-        inode = filp->f_path.dentry->d_inode;
+        inode = file_inode(filp);
        /* Don't allow mandatory locks on files that may be memory mapped
         * and shared.
@@ -2030,7 +2030,7 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
         * posix_lock_file().  Another process could be setting a lock on this
         * file at the same time, but we wouldn't remove that lock anyway.
         */
-        if (!filp->f_path.dentry->d_inode->i_flock)
+        if (!file_inode(filp)->i_flock)
                return;
        lock.fl_type = F_UNLCK;
@@ -2056,7 +2056,7 @@ EXPORT_SYMBOL(locks_remove_posix);
 */
 void locks_remove_flock(struct file *filp)
 {
-        struct inode * inode = filp->f_path.dentry->d_inode;
+        struct inode * inode = file_inode(filp);
        struct file_lock *fl;
        struct file_lock **before;
@@ -2152,7 +2152,7 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
                fl_pid = fl->fl_pid;
        if (fl->fl_file != NULL)
-                inode = fl->fl_file->f_path.dentry->d_inode;
+                inode = file_inode(fl->fl_file);
        seq_printf(f, "%lld:%s ", id, pfx);
        if (IS_POSIX(fl)) {
diff --git a/fs/logfs/Kconfig b/fs/logfs/Kconfig
index daf9a9b32dd3..09ed066c0221 100644
--- a/fs/logfs/Kconfig
+++ b/fs/logfs/Kconfig
@@ -1,6 +1,6 @@
 config LOGFS
-        tristate "LogFS file system (EXPERIMENTAL)"
+        tristate "LogFS file system"
-        depends on (MTD || BLOCK) && EXPERIMENTAL
+        depends on (MTD || BLOCK)
        select ZLIB_INFLATE
        select ZLIB_DEFLATE
        select CRC32
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 26e4a941532f..b82751082112 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -284,7 +284,7 @@ static int logfs_rmdir(struct inode *dir, struct dentry *dentry)
 #define IMPLICIT_NODES 2
 static int __logfs_readdir(struct file *file, void *buf, filldir_t filldir)
 {
-        struct inode *dir = file->f_dentry->d_inode;
+        struct inode *dir = file_inode(file);
        loff_t pos = file->f_pos - IMPLICIT_NODES;
        struct page *page;
        struct logfs_disk_dentry *dd;
@@ -320,7 +320,7 @@ static int __logfs_readdir(struct file *file, void *buf, filldir_t filldir)
 static int logfs_readdir(struct file *file, void *buf, filldir_t filldir)
 {
-        struct inode *inode = file->f_dentry->d_inode;
+        struct inode *inode = file_inode(file);
        ino_t pino = parent_ino(file->f_dentry);
        int err;
diff --git a/fs/logfs/file.c b/fs/logfs/file.c
index 3886cded283c..c2219a6dd3c8 100644
--- a/fs/logfs/file.c
+++ b/fs/logfs/file.c
@@ -183,7 +183,7 @@ static int logfs_releasepage(struct page *page, gfp_t only_xfs_uses_this)
 long logfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(file);
        struct logfs_inode *li = logfs_inode(inode);
        unsigned int oldflags, flags;
        int err;
diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index 685b2d981b87..a9ed6f36e6ea 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -85,7 +85,7 @@ static inline void *minix_next_entry(void *de, struct minix_sb_info *sbi)
 static int minix_readdir(struct file * filp, void * dirent, filldir_t filldir)
 {
        unsigned long pos = filp->f_pos;
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct super_block *sb = inode->i_sb;
        unsigned offset = pos & ~PAGE_CACHE_MASK;
        unsigned long n = pos >> PAGE_CACHE_SHIFT;
diff --git a/fs/namei.c b/fs/namei.c
index 43a97ee1d4c8..dc984fee5532 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -600,14 +600,10 @@ static int complete_walk(struct nameidata *nd)
        if (likely(!(nd->flags & LOOKUP_JUMPED)))
                return 0;
-        if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
+        if (likely(!(dentry->d_flags & DCACHE_OP_WEAK_REVALIDATE)))
                return 0;
-        if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
+        status = dentry->d_op->d_weak_revalidate(dentry, nd->flags);
-                return 0;
-        /* Note: we do not d_invalidate() */
-        status = d_revalidate(dentry, nd->flags);
        if (status > 0)
                return 0;
@@ -1342,7 +1338,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
 *  small and for now I'd prefer to have fast path as straight as possible.
 *  It _is_ time-critical.
 */
-static int lookup_fast(struct nameidata *nd, struct qstr *name,
+static int lookup_fast(struct nameidata *nd,
                       struct path *path, struct inode **inode)
 {
        struct vfsmount *mnt = nd->path.mnt;
@@ -1358,7 +1354,7 @@ static int lookup_fast(struct nameidata *nd, struct qstr *name,
         */
        if (nd->flags & LOOKUP_RCU) {
                unsigned seq;
-                dentry = __d_lookup_rcu(parent, name, &seq, nd->inode);
+                dentry = __d_lookup_rcu(parent, &nd->last, &seq, nd->inode);
                if (!dentry)
                        goto unlazy;
@@ -1400,7 +1396,7 @@ unlazy:
                if (unlazy_walk(nd, dentry))
                        return -ECHILD;
        } else {
-                dentry = __d_lookup(parent, name);
+                dentry = __d_lookup(parent, &nd->last);
        }
        if (unlikely(!dentry))
@@ -1436,8 +1432,7 @@ need_lookup:
 }
 /* Fast lookup failed, do it the slow way */
-static int lookup_slow(struct nameidata *nd, struct qstr *name,
+static int lookup_slow(struct nameidata *nd, struct path *path)
-                       struct path *path)
 {
        struct dentry *dentry, *parent;
        int err;
@@ -1446,7 +1441,7 @@ static int lookup_slow(struct nameidata *nd, struct qstr *name,
        BUG_ON(nd->inode != parent->d_inode);
        mutex_lock(&parent->d_inode->i_mutex);
-        dentry = __lookup_hash(name, parent, nd->flags);
+        dentry = __lookup_hash(&nd->last, parent, nd->flags);
        mutex_unlock(&parent->d_inode->i_mutex);
        if (IS_ERR(dentry))
                return PTR_ERR(dentry);
@@ -1519,7 +1514,7 @@ static inline int should_follow_link(struct inode *inode, int follow)
 }
 static inline int walk_component(struct nameidata *nd, struct path *path,
-                struct qstr *name, int type, int follow)
+                int follow)
 {
        struct inode *inode;
        int err;
@@ -1528,14 +1523,14 @@ static inline int walk_component(struct nameidata *nd, struct path *path,
         * to be able to know about the current root directory and
         * parent relationships.
         */
-        if (unlikely(type != LAST_NORM))
+        if (unlikely(nd->last_type != LAST_NORM))
-                return handle_dots(nd, type);
+                return handle_dots(nd, nd->last_type);
-        err = lookup_fast(nd, name, path, &inode);
+        err = lookup_fast(nd, path, &inode);
        if (unlikely(err)) {
                if (err < 0)
                        goto out_err;
-                err = lookup_slow(nd, name, path);
+                err = lookup_slow(nd, path);
                if (err < 0)
                        goto out_err;
@@ -1594,8 +1589,7 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd)
                res = follow_link(&link, nd, &cookie);
                if (res)
                        break;
-                res = walk_component(nd, path, &nd->last,
+                res = walk_component(nd, path, LOOKUP_FOLLOW);
-                                     nd->last_type, LOOKUP_FOLLOW);
                put_link(nd, &link, cookie);
        } while (res > 0);
@@ -1802,8 +1796,11 @@ static int link_path_walk(const char *name, struct nameidata *nd)
                        }
                }
+                nd->last = this;
+                nd->last_type = type;
                if (!name[len])
-                        goto last_component;
+                        return 0;
                /*
                 * If it wasn't NUL, we know it was '/'. Skip that
                 * slash, and continue until no more slashes.
@@ -1812,10 +1809,11 @@ static int link_path_walk(const char *name, struct nameidata *nd)
                        len++;
                } while (unlikely(name[len] == '/'));
                if (!name[len])
-                        goto last_component;
+                        return 0;
                name += len;
-                err = walk_component(nd, &next, &this, type, LOOKUP_FOLLOW);
+                err = walk_component(nd, &next, LOOKUP_FOLLOW);
                if (err < 0)
                        return err;
@@ -1824,16 +1822,10 @@ static int link_path_walk(const char *name, struct nameidata *nd)
                        if (err)
                                return err;
                }
-                if (can_lookup(nd->inode))
+                if (!can_lookup(nd->inode)) {
-                        continue;
+                        err = -ENOTDIR; 
-                err = -ENOTDIR; 
+                        break;
-                break;
+                }
-                /* here ends the main loop */
-last_component:
-                nd->last = this;
-                nd->last_type = type;
-                return 0;
        }
        terminate_walk(nd);
        return err;
@@ -1932,8 +1924,7 @@ static inline int lookup_last(struct nameidata *nd, struct path *path)
                nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
        nd->flags &= ~LOOKUP_PARENT;
-        return walk_component(nd, path, &nd->last, nd->last_type,
+        return walk_component(nd, path, nd->flags & LOOKUP_FOLLOW);
-                                        nd->flags & LOOKUP_FOLLOW);
 }
 /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
@@ -2732,7 +2723,7 @@ static int do_last(struct nameidata *nd, struct path *path,
                if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW))
                        symlink_ok = true;
                /* we _can_ be in RCU mode here */
-                error = lookup_fast(nd, &nd->last, path, &inode);
+                error = lookup_fast(nd, path, &inode);
                if (likely(!error))
                        goto finish_lookup;
@@ -2778,7 +2769,7 @@ retry_lookup:
                        goto out;
                if ((*opened & FILE_CREATED) ||
-                    !S_ISREG(file->f_path.dentry->d_inode->i_mode))
+                    !S_ISREG(file_inode(file)->i_mode))
                        will_truncate = false;
                audit_inode(name, file->f_path.dentry, 0);
@@ -2941,8 +2932,8 @@ static struct file *path_openat(int dfd, struct filename *pathname,
        int error;
        file = get_empty_filp();
-        if (!file)
+        if (IS_ERR(file))
-                return ERR_PTR(-ENFILE);
+                return file;
        file->f_flags = op->open_flag;
diff --git a/fs/namespace.c b/fs/namespace.c
index 55605c552787..50ca17d3cb45 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -384,7 +384,7 @@ EXPORT_SYMBOL_GPL(mnt_clone_write);
 */
 int __mnt_want_write_file(struct file *file)
 {
-        struct inode *inode = file->f_dentry->d_inode;
+        struct inode *inode = file_inode(file);
        if (!(file->f_mode & FMODE_WRITE) || special_file(inode->i_mode))
                return __mnt_want_write(file->f_path.mnt);
@@ -1237,6 +1237,14 @@ static int do_umount(struct mount *mnt, int flags)
        return retval;
 }
+/* 
+ * Is the caller allowed to modify his namespace?
+ */
+static inline bool may_mount(void)
+{
+        return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN);
+}
 /*
 * Now umount can handle mount points as well as block devices.
 * This is important for filesystems which use unnamed block devices.
@@ -1255,6 +1263,9 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
        if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW))
                return -EINVAL;
+        if (!may_mount())
+                return -EPERM;
        if (!(flags & UMOUNT_NOFOLLOW))
                lookup_flags |= LOOKUP_FOLLOW;
@@ -1268,10 +1279,6 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
        if (!check_mnt(mnt))
                goto dput_and_out;
-        retval = -EPERM;
-        if (!ns_capable(mnt->mnt_ns->user_ns, CAP_SYS_ADMIN))
-                goto dput_and_out;
        retval = do_umount(mnt, flags);
 dput_and_out:
        /* we mustn't call path_put() as that would clear mnt_expiry_mark */
@@ -1293,24 +1300,6 @@ SYSCALL_DEFINE1(oldumount, char __user *, name)
 #endif
-static int mount_is_safe(struct path *path)
-{
-        if (ns_capable(real_mount(path->mnt)->mnt_ns->user_ns, CAP_SYS_ADMIN))
-                return 0;
-        return -EPERM;
-#ifdef notyet
-        if (S_ISLNK(path->dentry->d_inode->i_mode))
-                return -EPERM;
-        if (path->dentry->d_inode->i_mode & S_ISVTX) {
-                if (current_uid() != path->dentry->d_inode->i_uid)
-                        return -EPERM;
-        }
-        if (inode_permission(path->dentry->d_inode, MAY_WRITE))
-                return -EPERM;
-        return 0;
-#endif
-}
 static bool mnt_ns_loop(struct path *path)
 {
        /* Could bind mounting the mount namespace inode cause a
@@ -1633,9 +1622,6 @@ static int do_change_type(struct path *path, int flag)
        int type;
        int err = 0;
-        if (!ns_capable(mnt->mnt_ns->user_ns, CAP_SYS_ADMIN))
-                return -EPERM;
        if (path->dentry != path->mnt->mnt_root)
                return -EINVAL;
@@ -1669,9 +1655,7 @@ static int do_loopback(struct path *path, const char *old_name,
        LIST_HEAD(umount_list);
        struct path old_path;
        struct mount *mnt = NULL, *old;
-        int err = mount_is_safe(path);
+        int err;
-        if (err)
-                return err;
        if (!old_name || !*old_name)
                return -EINVAL;
        err = kern_path(old_name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &old_path);
@@ -1748,9 +1732,6 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
        struct super_block *sb = path->mnt->mnt_sb;
        struct mount *mnt = real_mount(path->mnt);
-        if (!capable(CAP_SYS_ADMIN))
-                return -EPERM;
        if (!check_mnt(mnt))
                return -EINVAL;
@@ -1764,6 +1745,8 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
        down_write(&sb->s_umount);
        if (flags & MS_BIND)
                err = change_mount_flags(path->mnt, flags);
+        else if (!capable(CAP_SYS_ADMIN))
+                err = -EPERM;
        else
                err = do_remount_sb(sb, flags, data, 0);
        if (!err) {
@@ -1796,9 +1779,7 @@ static int do_move_mount(struct path *path, const char *old_name)
        struct path old_path, parent_path;
        struct mount *p;
        struct mount *old;
-        int err = 0;
+        int err;
-        if (!ns_capable(real_mount(path->mnt)->mnt_ns->user_ns, CAP_SYS_ADMIN))
-                return -EPERM;
        if (!old_name || !*old_name)
                return -EINVAL;
        err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
@@ -1933,18 +1914,13 @@ static int do_new_mount(struct path *path, const char *fstype, int flags,
                        int mnt_flags, const char *name, void *data)
 {
        struct file_system_type *type;
-        struct user_namespace *user_ns;
+        struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
        struct vfsmount *mnt;
        int err;
        if (!fstype)
                return -EINVAL;
-        /* we need capabilities... */
-        user_ns = real_mount(path->mnt)->mnt_ns->user_ns;
-        if (!ns_capable(user_ns, CAP_SYS_ADMIN))
-                return -EPERM;
        type = get_fs_type(fstype);
        if (!type)
                return -ENODEV;
@@ -2258,6 +2234,9 @@ long do_mount(const char *dev_name, const char *dir_name,
        if (retval)
                goto dput_out;
+        if (!may_mount())
+                return -EPERM;
        /* Default to relatime unless overriden */
        if (!(flags & MS_NOATIME))
                mnt_flags |= MNT_RELATIME;
@@ -2567,7 +2546,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
        struct mount *new_mnt, *root_mnt;
        int error;
-        if (!ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN))
+        if (!may_mount())
                return -EPERM;
        error = user_path_dir(new_root, &new);
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 4117e7b377bb..816326093656 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -593,14 +593,10 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
                return 1; /* I'm not sure */
        qname.name = __name;
-        qname.hash = full_name_hash(qname.name, qname.len);
-        if (dentry->d_op && dentry->d_op->d_hash)
-                if (dentry->d_op->d_hash(dentry, dentry->d_inode, &qname) != 0)
-                        goto end_advance;
-        newdent = d_lookup(dentry, &qname);
+        newdent = d_hash_and_lookup(dentry, &qname);
+        if (unlikely(IS_ERR(newdent)))
+                goto end_advance;
        if (!newdent) {
                newdent = d_alloc(dentry, &qname);
                if (!newdent)
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 1acdad7fcec7..7dafd6899a62 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -331,12 +331,15 @@ static int  ncp_show_options(struct seq_file *seq, struct dentry *root)
        struct ncp_server *server = NCP_SBP(root->d_sb);
        unsigned int tmp;
-        if (server->m.uid != 0)
+        if (!uid_eq(server->m.uid, GLOBAL_ROOT_UID))
-                seq_printf(seq, ",uid=%u", server->m.uid);
+                seq_printf(seq, ",uid=%u",
-        if (server->m.gid != 0)
+                           from_kuid_munged(&init_user_ns, server->m.uid));
-                seq_printf(seq, ",gid=%u", server->m.gid);
+        if (!gid_eq(server->m.gid, GLOBAL_ROOT_GID))
-        if (server->m.mounted_uid != 0)
+                seq_printf(seq, ",gid=%u",
-                seq_printf(seq, ",owner=%u", server->m.mounted_uid);
+                           from_kgid_munged(&init_user_ns, server->m.gid));
+        if (!uid_eq(server->m.mounted_uid, GLOBAL_ROOT_UID))
+                seq_printf(seq, ",owner=%u",
+                           from_kuid_munged(&init_user_ns, server->m.mounted_uid));
        tmp = server->m.file_mode & S_IALLUGO;
        if (tmp != NCP_DEFAULT_FILE_MODE)
                seq_printf(seq, ",mode=0%o", tmp);
@@ -381,13 +384,13 @@ static int ncp_parse_options(struct ncp_mount_data_kernel *data, char *options)
        data->flags = 0;
        data->int_flags = 0;
-        data->mounted_uid = 0;
+        data->mounted_uid = GLOBAL_ROOT_UID;
        data->wdog_pid = NULL;
        data->ncp_fd = ~0;
        data->time_out = NCP_DEFAULT_TIME_OUT;
        data->retry_count = NCP_DEFAULT_RETRY_COUNT;
-        data->uid = 0;
+        data->uid = GLOBAL_ROOT_UID;
-        data->gid = 0;
+        data->gid = GLOBAL_ROOT_GID;
        data->file_mode = NCP_DEFAULT_FILE_MODE;
        data->dir_mode = NCP_DEFAULT_DIR_MODE;
        data->info_fd = -1;
@@ -399,13 +402,19 @@ static int ncp_parse_options(struct ncp_mount_data_kernel *data, char *options)
                        goto err;
                switch (optval) {
                        case 'u':
-                                data->uid = optint;
+                                data->uid = make_kuid(current_user_ns(), optint);
+                                if (!uid_valid(data->uid))
+                                        goto err;
                                break;
                        case 'g':
-                                data->gid = optint;
+                                data->gid = make_kgid(current_user_ns(), optint);
+                                if (!gid_valid(data->gid))
+                                        goto err;
                                break;
                        case 'o':
-                                data->mounted_uid = optint;
+                                data->mounted_uid = make_kuid(current_user_ns(), optint);
+                                if (!uid_valid(data->mounted_uid))
+                                        goto err;
                                break;
                        case 'm':
                                data->file_mode = optint;
@@ -480,13 +489,13 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
                                data.flags = md->flags;
                                data.int_flags = NCP_IMOUNT_LOGGEDIN_POSSIBLE;
-                                data.mounted_uid = md->mounted_uid;
+                                data.mounted_uid = make_kuid(current_user_ns(), md->mounted_uid);
                                data.wdog_pid = find_get_pid(md->wdog_pid);
                                data.ncp_fd = md->ncp_fd;
                                data.time_out = md->time_out;
                                data.retry_count = md->retry_count;
-                                data.uid = md->uid;
+                                data.uid = make_kuid(current_user_ns(), md->uid);
-                                data.gid = md->gid;
+                                data.gid = make_kgid(current_user_ns(), md->gid);
                                data.file_mode = md->file_mode;
                                data.dir_mode = md->dir_mode;
                                data.info_fd = -1;
@@ -499,13 +508,13 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
                                struct ncp_mount_data_v4* md = (struct ncp_mount_data_v4*)raw_data;
                                data.flags = md->flags;
-                                data.mounted_uid = md->mounted_uid;
+                                data.mounted_uid = make_kuid(current_user_ns(), md->mounted_uid);
                                data.wdog_pid = find_get_pid(md->wdog_pid);
                                data.ncp_fd = md->ncp_fd;
                                data.time_out = md->time_out;
                                data.retry_count = md->retry_count;
-                                data.uid = md->uid;
+                                data.uid = make_kuid(current_user_ns(), md->uid);
-                                data.gid = md->gid;
+                                data.gid = make_kgid(current_user_ns(), md->gid);
                                data.file_mode = md->file_mode;
                                data.dir_mode = md->dir_mode;
                                data.info_fd = -1;
@@ -520,12 +529,16 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
                                goto out;
                        break;
        }
+        error = -EINVAL;
+        if (!uid_valid(data.mounted_uid) || !uid_valid(data.uid) ||
+            !gid_valid(data.gid))
+                goto out;
        error = -EBADF;
        ncp_filp = fget(data.ncp_fd);
        if (!ncp_filp)
                goto out;
        error = -ENOTSOCK;
-        sock_inode = ncp_filp->f_path.dentry->d_inode;
+        sock_inode = file_inode(ncp_filp);
        if (!S_ISSOCK(sock_inode->i_mode))
                goto out_fput;
        sock = SOCKET_I(sock_inode);
@@ -564,7 +577,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
                if (!server->info_filp)
                        goto out_bdi;
                error = -ENOTSOCK;
-                sock_inode = server->info_filp->f_path.dentry->d_inode;
+                sock_inode = file_inode(server->info_filp);
                if (!S_ISSOCK(sock_inode->i_mode))
                        goto out_fput2;
                info_sock = SOCKET_I(sock_inode);
@@ -886,12 +899,10 @@ int ncp_notify_change(struct dentry *dentry, struct iattr *attr)
                goto out;
        result = -EPERM;
-        if (((attr->ia_valid & ATTR_UID) &&
+        if ((attr->ia_valid & ATTR_UID) && !uid_eq(attr->ia_uid, server->m.uid))
-             (attr->ia_uid != server->m.uid)))
                goto out;
-        if (((attr->ia_valid & ATTR_GID) &&
+        if ((attr->ia_valid & ATTR_GID) && !gid_eq(attr->ia_gid, server->m.gid))
-             (attr->ia_gid != server->m.gid)))
                goto out;
        if (((attr->ia_valid & ATTR_MODE) &&
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index 6958adfaff08..60426ccb3b65 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -45,7 +45,7 @@ ncp_get_fs_info(struct ncp_server * server, struct inode *inode,
                return -EINVAL;
        }
        /* TODO: info.addr = server->m.serv_addr; */
-        SET_UID(info.mounted_uid, server->m.mounted_uid);
+        SET_UID(info.mounted_uid, from_kuid_munged(current_user_ns(), server->m.mounted_uid));
        info.connection         = server->connection;
        info.buffer_size        = server->buffer_size;
        info.volume_number      = NCP_FINFO(inode)->volNumber;
@@ -69,7 +69,7 @@ ncp_get_fs_info_v2(struct ncp_server * server, struct inode *inode,
                DPRINTK("info.version invalid: %d\n", info2.version);
                return -EINVAL;
        }
-        info2.mounted_uid   = server->m.mounted_uid;
+        info2.mounted_uid   = from_kuid_munged(current_user_ns(), server->m.mounted_uid);
        info2.connection    = server->connection;
        info2.buffer_size   = server->buffer_size;
        info2.volume_number = NCP_FINFO(inode)->volNumber;
@@ -135,7 +135,7 @@ ncp_get_compat_fs_info_v2(struct ncp_server * server, struct inode *inode,
                DPRINTK("info.version invalid: %d\n", info2.version);
                return -EINVAL;
        }
-        info2.mounted_uid   = server->m.mounted_uid;
+        info2.mounted_uid   = from_kuid_munged(current_user_ns(), server->m.mounted_uid);
        info2.connection    = server->connection;
        info2.buffer_size   = server->buffer_size;
        info2.volume_number = NCP_FINFO(inode)->volNumber;
@@ -348,22 +348,25 @@ static long __ncp_ioctl(struct inode *inode, unsigned int cmd, unsigned long arg
                {
                        u16 uid;
-                        SET_UID(uid, server->m.mounted_uid);
+                        SET_UID(uid, from_kuid_munged(current_user_ns(), server->m.mounted_uid));
                        if (put_user(uid, (u16 __user *)argp))
                                return -EFAULT;
                        return 0;
                }
        case NCP_IOC_GETMOUNTUID32:
-                if (put_user(server->m.mounted_uid,
+        {
-                             (u32 __user *)argp))
+                uid_t uid = from_kuid_munged(current_user_ns(), server->m.mounted_uid);
+                if (put_user(uid, (u32 __user *)argp))
                        return -EFAULT;
                return 0;
+        }
        case NCP_IOC_GETMOUNTUID64:
-                if (put_user(server->m.mounted_uid,
+        {
-                             (u64 __user *)argp))
+                uid_t uid = from_kuid_munged(current_user_ns(), server->m.mounted_uid);
+                if (put_user(uid, (u64 __user *)argp))
                        return -EFAULT;
                return 0;
+        }
        case NCP_IOC_GETROOT:
                {
                        struct ncp_setroot_ioctl sr;
@@ -808,9 +811,9 @@ outrel:
 long ncp_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
-        struct inode *inode = filp->f_dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct ncp_server *server = NCP_SERVER(inode);
-        uid_t uid = current_uid();
+        kuid_t uid = current_uid();
        int need_drop_write = 0;
        long ret;
@@ -819,12 +822,12 @@ long ncp_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
        case NCP_IOC_CONN_LOGGED_IN:
        case NCP_IOC_SETROOT:
                if (!capable(CAP_SYS_ADMIN)) {
-                        ret = -EACCES;
+                        ret = -EPERM;
                        goto out;
                }
                break;
        }
-        if (server->m.mounted_uid != uid) {
+        if (!uid_eq(server->m.mounted_uid, uid)) {
                switch (cmd) {
                /*
                 * Only mount owner can issue these ioctls.  Information
diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c
index 63d14a99483d..ee24df5af1f9 100644
--- a/fs/ncpfs/mmap.c
+++ b/fs/ncpfs/mmap.c
@@ -105,7 +105,7 @@ static const struct vm_operations_struct ncp_file_mmap =
 /* This is used for a general mmap of a ncp file */
 int ncp_mmap(struct file *file, struct vm_area_struct *vma)
 {
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(file);
        
        DPRINTK("ncp_mmap: called\n");
diff --git a/fs/ncpfs/ncp_fs_sb.h b/fs/ncpfs/ncp_fs_sb.h
index 54cc0cdb3dcb..c51b2c543539 100644
--- a/fs/ncpfs/ncp_fs_sb.h
+++ b/fs/ncpfs/ncp_fs_sb.h
@@ -23,15 +23,15 @@ struct ncp_mount_data_kernel {
        unsigned long    flags;         /* NCP_MOUNT_* flags */
        unsigned int     int_flags;     /* internal flags */
 #define NCP_IMOUNT_LOGGEDIN_POSSIBLE    0x0001
-        uid_t            mounted_uid;   /* Who may umount() this filesystem? */
+        kuid_t           mounted_uid;   /* Who may umount() this filesystem? */
        struct pid      *wdog_pid;      /* Who cares for our watchdog packets? */
        unsigned int     ncp_fd;        /* The socket to the ncp port */
        unsigned int     time_out;      /* How long should I wait after
                                           sending a NCP request? */
        unsigned int     retry_count;   /* And how often should I retry? */
        unsigned char    mounted_vol[NCP_VOLNAME_LEN + 1];
-        uid_t            uid;
+        kuid_t           uid;
-        gid_t            gid;
+        kgid_t           gid;
        umode_t          file_mode;
        umode_t          dir_mode;
        int              info_fd;
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 4fa788c93f46..434b93ec0970 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -1273,6 +1273,7 @@ static const struct nfs_pageio_ops bl_pg_write_ops = {
 static struct pnfs_layoutdriver_type blocklayout_type = {
        .id                             = LAYOUT_BLOCK_VOLUME,
        .name                           = "LAYOUT_BLOCK_VOLUME",
+        .owner                          = THIS_MODULE,
        .read_pagelist                  = bl_read_pagelist,
        .write_pagelist                 = bl_write_pagelist,
        .alloc_layout_hdr               = bl_alloc_layout_hdr,
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index c89b26bc9759..2960512792c2 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -183,60 +183,15 @@ static u32 initiate_file_draining(struct nfs_client *clp,
 static u32 initiate_bulk_draining(struct nfs_client *clp,
                                  struct cb_layoutrecallargs *args)
 {
-        struct nfs_server *server;
+        int stat;
-        struct pnfs_layout_hdr *lo;
-        struct inode *ino;
-        u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
-        struct pnfs_layout_hdr *tmp;
-        LIST_HEAD(recall_list);
-        LIST_HEAD(free_me_list);
-        struct pnfs_layout_range range = {
-                .iomode = IOMODE_ANY,
-                .offset = 0,
-                .length = NFS4_MAX_UINT64,
-        };
-        spin_lock(&clp->cl_lock);
-        rcu_read_lock();
-        list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
-                if ((args->cbl_recall_type == RETURN_FSID) &&
-                    memcmp(&server->fsid, &args->cbl_fsid,
-                           sizeof(struct nfs_fsid)))
-                        continue;
-                list_for_each_entry(lo, &server->layouts, plh_layouts) {
-                        ino = igrab(lo->plh_inode);
-                        if (ino)
-                                continue;
-                        spin_lock(&ino->i_lock);
-                        /* Is this layout in the process of being freed? */
-                        if (NFS_I(ino)->layout != lo) {
-                                spin_unlock(&ino->i_lock);
-                                iput(ino);
-                                continue;
-                        }
-                        pnfs_get_layout_hdr(lo);
-                        spin_unlock(&ino->i_lock);
-                        list_add(&lo->plh_bulk_recall, &recall_list);
-                }
-        }
-        rcu_read_unlock();
-        spin_unlock(&clp->cl_lock);
-        list_for_each_entry_safe(lo, tmp,
+        if (args->cbl_recall_type == RETURN_FSID)
-                                 &recall_list, plh_bulk_recall) {
+                stat = pnfs_destroy_layouts_byfsid(clp, &args->cbl_fsid, true);
-                ino = lo->plh_inode;
+        else
-                spin_lock(&ino->i_lock);
+                stat = pnfs_destroy_layouts_byclid(clp, true);
-                set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
+        if (stat != 0)
-                if (pnfs_mark_matching_lsegs_invalid(lo, &free_me_list, &range))
+                return NFS4ERR_DELAY;
-                        rv = NFS4ERR_DELAY;
+        return NFS4ERR_NOMATCHING_LAYOUT;
-                list_del_init(&lo->plh_bulk_recall);
-                spin_unlock(&ino->i_lock);
-                pnfs_free_lseg_list(&free_me_list);
-                pnfs_put_layout_hdr(lo);
-                iput(ino);
-        }
-        return rv;
 }
 static u32 do_callback_layoutrecall(struct nfs_client *clp,
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 9f3c66438d0e..84d8eae203a7 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -197,7 +197,6 @@ error_0:
 EXPORT_SYMBOL_GPL(nfs_alloc_client);
 #if IS_ENABLED(CONFIG_NFS_V4)
-/* idr_remove_all is not needed as all id's are removed by nfs_put_client */
 void nfs_cleanup_cb_ident_idr(struct net *net)
 {
        struct nfs_net *nn = net_generic(net, nfs_net_id);
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 81c5eec3cf38..6390a4b5fee7 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -55,7 +55,8 @@ int nfs4_have_delegation(struct inode *inode, fmode_t flags)
        flags &= FMODE_READ|FMODE_WRITE;
        rcu_read_lock();
        delegation = rcu_dereference(NFS_I(inode)->delegation);
-        if (delegation != NULL && (delegation->type & flags) == flags) {
+        if (delegation != NULL && (delegation->type & flags) == flags &&
+            !test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) {
                nfs_mark_delegation_referenced(delegation);
                ret = 1;
        }
@@ -70,8 +71,10 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_
        int status = 0;
        if (inode->i_flock == NULL)
-                goto out;
+                return 0;
+        if (inode->i_flock == NULL)
+                goto out;
        /* Protect inode->i_flock using the file locks lock */
        lock_flocks();
        for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
@@ -94,7 +97,9 @@ static int nfs_delegation_claim_opens(struct inode *inode, const nfs4_stateid *s
 {
        struct nfs_inode *nfsi = NFS_I(inode);
        struct nfs_open_context *ctx;
+        struct nfs4_state_owner *sp;
        struct nfs4_state *state;
+        unsigned int seq;
        int err;
 again:
@@ -109,9 +114,16 @@ again:
                        continue;
                get_nfs_open_context(ctx);
                spin_unlock(&inode->i_lock);
+                sp = state->owner;
+                /* Block nfs4_proc_unlck */
+                mutex_lock(&sp->so_delegreturn_mutex);
+                seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
                err = nfs4_open_delegation_recall(ctx, state, stateid);
-                if (err >= 0)
+                if (!err)
                        err = nfs_delegation_claim_locks(ctx, state);
+                if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
+                        err = -EAGAIN;
+                mutex_unlock(&sp->so_delegreturn_mutex);
                put_nfs_open_context(ctx);
                if (err != 0)
                        return err;
@@ -182,39 +194,91 @@ static struct inode *nfs_delegation_grab_inode(struct nfs_delegation *delegation
 }
 static struct nfs_delegation *
+nfs_start_delegation_return_locked(struct nfs_inode *nfsi)
+{
+        struct nfs_delegation *ret = NULL;
+        struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation);
+        if (delegation == NULL)
+                goto out;
+        spin_lock(&delegation->lock);
+        if (!test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
+                ret = delegation;
+        spin_unlock(&delegation->lock);
+out:
+        return ret;
+}
+static struct nfs_delegation *
+nfs_start_delegation_return(struct nfs_inode *nfsi)
+{
+        struct nfs_delegation *delegation;
+        rcu_read_lock();
+        delegation = nfs_start_delegation_return_locked(nfsi);
+        rcu_read_unlock();
+        return delegation;
+}
+static void
+nfs_abort_delegation_return(struct nfs_delegation *delegation,
+                struct nfs_client *clp)
+{
+        spin_lock(&delegation->lock);
+        clear_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
+        set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
+        spin_unlock(&delegation->lock);
+        set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
+}
+static struct nfs_delegation *
 nfs_detach_delegation_locked(struct nfs_inode *nfsi,
-                             struct nfs_server *server)
+                struct nfs_delegation *delegation,
+                struct nfs_client *clp)
 {
-        struct nfs_delegation *delegation =
+        struct nfs_delegation *deleg_cur =
                rcu_dereference_protected(nfsi->delegation,
-                                lockdep_is_held(&server->nfs_client->cl_lock));
+                                lockdep_is_held(&clp->cl_lock));
-        if (delegation == NULL)
+        if (deleg_cur == NULL || delegation != deleg_cur)
-                goto nomatch;
+                return NULL;
        spin_lock(&delegation->lock);
+        set_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
        list_del_rcu(&delegation->super_list);
        delegation->inode = NULL;
        nfsi->delegation_state = 0;
        rcu_assign_pointer(nfsi->delegation, NULL);
        spin_unlock(&delegation->lock);
        return delegation;
-nomatch:
-        return NULL;
 }
 static struct nfs_delegation *nfs_detach_delegation(struct nfs_inode *nfsi,
-                                                    struct nfs_server *server)
+                struct nfs_delegation *delegation,
+                struct nfs_server *server)
 {
        struct nfs_client *clp = server->nfs_client;
-        struct nfs_delegation *delegation;
        spin_lock(&clp->cl_lock);
-        delegation = nfs_detach_delegation_locked(nfsi, server);
+        delegation = nfs_detach_delegation_locked(nfsi, delegation, clp);
        spin_unlock(&clp->cl_lock);
        return delegation;
 }
+static struct nfs_delegation *
+nfs_inode_detach_delegation(struct inode *inode)
+{
+        struct nfs_inode *nfsi = NFS_I(inode);
+        struct nfs_server *server = NFS_SERVER(inode);
+        struct nfs_delegation *delegation;
+        delegation = nfs_start_delegation_return(nfsi);
+        if (delegation == NULL)
+                return NULL;
+        return nfs_detach_delegation(nfsi, delegation, server);
+}
 /**
 * nfs_inode_set_delegation - set up a delegation on an inode
 * @inode: inode to which delegation applies
@@ -268,7 +332,10 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
                        delegation = NULL;
                        goto out;
                }
-                freeme = nfs_detach_delegation_locked(nfsi, server);
+                freeme = nfs_detach_delegation_locked(nfsi, 
+                                old_delegation, clp);
+                if (freeme == NULL)
+                        goto out;
        }
        list_add_rcu(&delegation->super_list, &server->delegations);
        nfsi->delegation_state = delegation->type;
@@ -292,19 +359,29 @@ out:
 /*
 * Basic procedure for returning a delegation to the server
 */
-static int __nfs_inode_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync)
+static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation *delegation, int issync)
 {
+        struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
        struct nfs_inode *nfsi = NFS_I(inode);
        int err;
-        /*
+        if (delegation == NULL)
-         * Guard against new delegated open/lock/unlock calls and against
+                return 0;
-         * state recovery
+        do {
-         */
+                err = nfs_delegation_claim_opens(inode, &delegation->stateid);
-        down_write(&nfsi->rwsem);
+                if (!issync || err != -EAGAIN)
-        err = nfs_delegation_claim_opens(inode, &delegation->stateid);
+                        break;
-        up_write(&nfsi->rwsem);
+                /*
-        if (err)
+                 * Guard against state recovery
+                 */
+                err = nfs4_wait_clnt_recover(clp);
+        } while (err == 0);
+        if (err) {
+                nfs_abort_delegation_return(delegation, clp);
+                goto out;
+        }
+        if (!nfs_detach_delegation(nfsi, delegation, NFS_SERVER(inode)))
                goto out;
        err = nfs_do_return_delegation(inode, delegation, issync);
@@ -340,13 +417,10 @@ restart:
                        inode = nfs_delegation_grab_inode(delegation);
                        if (inode == NULL)
                                continue;
-                        delegation = nfs_detach_delegation(NFS_I(inode),
+                        delegation = nfs_start_delegation_return_locked(NFS_I(inode));
-                                                                server);
                        rcu_read_unlock();
-                        if (delegation != NULL)
+                        err = nfs_end_delegation_return(inode, delegation, 0);
-                                err = __nfs_inode_return_delegation(inode,
-                                                                delegation, 0);
                        iput(inode);
                        if (!err)
                                goto restart;
@@ -367,15 +441,11 @@ restart:
 */
 void nfs_inode_return_delegation_noreclaim(struct inode *inode)
 {
-        struct nfs_server *server = NFS_SERVER(inode);
-        struct nfs_inode *nfsi = NFS_I(inode);
        struct nfs_delegation *delegation;
-        if (rcu_access_pointer(nfsi->delegation) != NULL) {
+        delegation = nfs_inode_detach_delegation(inode);
-                delegation = nfs_detach_delegation(nfsi, server);
+        if (delegation != NULL)
-                if (delegation != NULL)
+                nfs_do_return_delegation(inode, delegation, 0);
-                        nfs_do_return_delegation(inode, delegation, 0);
-        }
 }
 /**
@@ -390,18 +460,14 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode)
 */
 int nfs4_inode_return_delegation(struct inode *inode)
 {
-        struct nfs_server *server = NFS_SERVER(inode);
        struct nfs_inode *nfsi = NFS_I(inode);
        struct nfs_delegation *delegation;
        int err = 0;
        nfs_wb_all(inode);
-        if (rcu_access_pointer(nfsi->delegation) != NULL) {
+        delegation = nfs_start_delegation_return(nfsi);
-                delegation = nfs_detach_delegation(nfsi, server);
+        if (delegation != NULL)
-                if (delegation != NULL) {
+                err = nfs_end_delegation_return(inode, delegation, 1);
-                        err = __nfs_inode_return_delegation(inode, delegation, 1);
-                }
-        }
        return err;
 }
@@ -471,7 +537,7 @@ void nfs_remove_bad_delegation(struct inode *inode)
 {
        struct nfs_delegation *delegation;
-        delegation = nfs_detach_delegation(NFS_I(inode), NFS_SERVER(inode));
+        delegation = nfs_inode_detach_delegation(inode);
        if (delegation) {
                nfs_inode_find_state_and_recover(inode, &delegation->stateid);
                nfs_free_delegation(delegation);
@@ -649,7 +715,7 @@ restart:
                        if (inode == NULL)
                                continue;
                        delegation = nfs_detach_delegation(NFS_I(inode),
-                                                                server);
+                                        delegation, server);
                        rcu_read_unlock();
                        if (delegation != NULL)
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index bbc6a4dba0d8..d54d4fca6793 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -29,6 +29,7 @@ enum {
        NFS_DELEGATION_NEED_RECLAIM = 0,
        NFS_DELEGATION_RETURN,
        NFS_DELEGATION_REFERENCED,
+        NFS_DELEGATION_RETURNING,
 };
 int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 32e6c53520e2..f23f455be42b 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -281,7 +281,7 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des
        for (i = 0; i < array->size; i++) {
                if (array->array[i].cookie == *desc->dir_cookie) {
-                        struct nfs_inode *nfsi = NFS_I(desc->file->f_path.dentry->d_inode);
+                        struct nfs_inode *nfsi = NFS_I(file_inode(desc->file));
                        struct nfs_open_dir_context *ctx = desc->file->private_data;
                        new_pos = desc->current_index + i;
@@ -629,7 +629,7 @@ out:
 static
 int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page)
 {
-        struct inode    *inode = desc->file->f_path.dentry->d_inode;
+        struct inode    *inode = file_inode(desc->file);
        int ret;
        ret = nfs_readdir_xdr_to_array(desc, page, inode);
@@ -660,7 +660,7 @@ void cache_page_release(nfs_readdir_descriptor_t *desc)
 static
 struct page *get_cache_page(nfs_readdir_descriptor_t *desc)
 {
-        return read_cache_page(desc->file->f_path.dentry->d_inode->i_mapping,
+        return read_cache_page(file_inode(desc->file)->i_mapping,
                        desc->page_index, (filler_t *)nfs_readdir_filler, desc);
 }
@@ -764,7 +764,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
 {
        struct page     *page = NULL;
        int             status;
-        struct inode *inode = desc->file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(desc->file);
        struct nfs_open_dir_context *ctx = desc->file->private_data;
        dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n",
@@ -1136,6 +1136,45 @@ out_error:
 }
 /*
+ * A weaker form of d_revalidate for revalidating just the dentry->d_inode
+ * when we don't really care about the dentry name. This is called when a
+ * pathwalk ends on a dentry that was not found via a normal lookup in the
+ * parent dir (e.g.: ".", "..", procfs symlinks or mountpoint traversals).
+ *
+ * In this situation, we just want to verify that the inode itself is OK
+ * since the dentry might have changed on the server.
+ */
+static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags)
+{
+        int error;
+        struct inode *inode = dentry->d_inode;
+        /*
+         * I believe we can only get a negative dentry here in the case of a
+         * procfs-style symlink. Just assume it's correct for now, but we may
+         * eventually need to do something more here.
+         */
+        if (!inode) {
+                dfprintk(LOOKUPCACHE, "%s: %s/%s has negative inode\n",
+                                __func__, dentry->d_parent->d_name.name,
+                                dentry->d_name.name);
+                return 1;
+        }
+        if (is_bad_inode(inode)) {
+                dfprintk(LOOKUPCACHE, "%s: %s/%s has dud inode\n",
+                                __func__, dentry->d_parent->d_name.name,
+                                dentry->d_name.name);
+                return 0;
+        }
+        error = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+        dfprintk(LOOKUPCACHE, "NFS: %s: inode %lu is %s\n",
+                        __func__, inode->i_ino, error ? "invalid" : "valid");
+        return !error;
+}
+/*
 * This is called from dput() when d_count is going to 0.
 */
 static int nfs_dentry_delete(const struct dentry *dentry)
@@ -1202,6 +1241,7 @@ static void nfs_d_release(struct dentry *dentry)
 const struct dentry_operations nfs_dentry_operations = {
        .d_revalidate   = nfs_lookup_revalidate,
+        .d_weak_revalidate      = nfs_weak_revalidate,
        .d_delete       = nfs_dentry_delete,
        .d_iput         = nfs_dentry_iput,
        .d_automount    = nfs_d_automount,
@@ -2153,12 +2193,16 @@ static int nfs_open_permission_mask(int openflags)
 {
        int mask = 0;
-        if ((openflags & O_ACCMODE) != O_WRONLY)
+        if (openflags & __FMODE_EXEC) {
-                mask |= MAY_READ;
+                /* ONLY check exec rights */
-        if ((openflags & O_ACCMODE) != O_RDONLY)
+                mask = MAY_EXEC;
-                mask |= MAY_WRITE;
+        } else {
-        if (openflags & __FMODE_EXEC)
+                if ((openflags & O_ACCMODE) != O_WRONLY)
-                mask |= MAY_EXEC;
+                        mask |= MAY_READ;
+                if ((openflags & O_ACCMODE) != O_RDONLY)
+                        mask |= MAY_WRITE;
+        }
        return mask;
 }
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 3c2b893665ba..29f4a48a0ee6 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -292,7 +292,7 @@ static int
 nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 {
        int ret;
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(file);
        do {
                ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 033803c36644..44efaa8c5f78 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -126,8 +126,7 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh,
        }
        spin_unlock(&ret->d_lock);
 out:
-        if (name)
+        kfree(name);
-                kfree(name);
        nfs_free_fattr(fsinfo.fattr);
        return ret;
 }
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index bc3968fa81e5..dc0f98dfa717 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -97,7 +97,7 @@ static void nfs_fattr_free_group_name(struct nfs_fattr *fattr)
 static bool nfs_fattr_map_owner_name(struct nfs_server *server, struct nfs_fattr *fattr)
 {
        struct nfs4_string *owner = fattr->owner_name;
-        __u32 uid;
+        kuid_t uid;
        if (!(fattr->valid & NFS_ATTR_FATTR_OWNER_NAME))
                return false;
@@ -111,7 +111,7 @@ static bool nfs_fattr_map_owner_name(struct nfs_server *server, struct nfs_fattr
 static bool nfs_fattr_map_group_name(struct nfs_server *server, struct nfs_fattr *fattr)
 {
        struct nfs4_string *group = fattr->group_name;
-        __u32 gid;
+        kgid_t gid;
        if (!(fattr->valid & NFS_ATTR_FATTR_GROUP_NAME))
                return false;
@@ -193,7 +193,8 @@ static int nfs_idmap_init_keyring(void)
        if (!cred)
                return -ENOMEM;
-        keyring = keyring_alloc(".id_resolver", 0, 0, cred,
+        keyring = keyring_alloc(".id_resolver",
+                                GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred,
                                (KEY_POS_ALL & ~KEY_POS_SETATTR) |
                                KEY_USR_VIEW | KEY_USR_READ,
                                KEY_ALLOC_NOT_IN_QUOTA, NULL);
@@ -764,7 +765,7 @@ out:
 static ssize_t
 idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
 {
-        struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode);
+        struct rpc_inode *rpci = RPC_I(file_inode(filp));
        struct idmap *idmap = (struct idmap *)rpci->private;
        struct key_construction *cons;
        struct idmap_msg im;
@@ -836,43 +837,61 @@ idmap_release_pipe(struct inode *inode)
        nfs_idmap_abort_pipe_upcall(idmap, -EPIPE);
 }
-int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid)
+int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, kuid_t *uid)
 {
        struct idmap *idmap = server->nfs_client->cl_idmap;
+        __u32 id = -1;
+        int ret = 0;
-        if (nfs_map_string_to_numeric(name, namelen, uid))
+        if (!nfs_map_string_to_numeric(name, namelen, &id))
-                return 0;
+                ret = nfs_idmap_lookup_id(name, namelen, "uid", &id, idmap);
-        return nfs_idmap_lookup_id(name, namelen, "uid", uid, idmap);
+        if (ret == 0) {
+                *uid = make_kuid(&init_user_ns, id);
+                if (!uid_valid(*uid))
+                        ret = -ERANGE;
+        }
+        return ret;
 }
-int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *gid)
+int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, kgid_t *gid)
 {
        struct idmap *idmap = server->nfs_client->cl_idmap;
+        __u32 id = -1;
+        int ret = 0;
-        if (nfs_map_string_to_numeric(name, namelen, gid))
+        if (!nfs_map_string_to_numeric(name, namelen, &id))
-                return 0;
+                ret = nfs_idmap_lookup_id(name, namelen, "gid", &id, idmap);
-        return nfs_idmap_lookup_id(name, namelen, "gid", gid, idmap);
+        if (ret == 0) {
+                *gid = make_kgid(&init_user_ns, id);
+                if (!gid_valid(*gid))
+                        ret = -ERANGE;
+        }
+        return ret;
 }
-int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen)
+int nfs_map_uid_to_name(const struct nfs_server *server, kuid_t uid, char *buf, size_t buflen)
 {
        struct idmap *idmap = server->nfs_client->cl_idmap;
        int ret = -EINVAL;
+        __u32 id;
+        id = from_kuid(&init_user_ns, uid);
        if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
-                ret = nfs_idmap_lookup_name(uid, "user", buf, buflen, idmap);
+                ret = nfs_idmap_lookup_name(id, "user", buf, buflen, idmap);
        if (ret < 0)
-                ret = nfs_map_numeric_to_string(uid, buf, buflen);
+                ret = nfs_map_numeric_to_string(id, buf, buflen);
        return ret;
 }
-int nfs_map_gid_to_group(const struct nfs_server *server, __u32 gid, char *buf, size_t buflen)
+int nfs_map_gid_to_group(const struct nfs_server *server, kgid_t gid, char *buf, size_t buflen)
 {
        struct idmap *idmap = server->nfs_client->cl_idmap;
        int ret = -EINVAL;
+        __u32 id;
+        id = from_kgid(&init_user_ns, gid);
        if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
-                ret = nfs_idmap_lookup_name(gid, "group", buf, buflen, idmap);
+                ret = nfs_idmap_lookup_name(id, "group", buf, buflen, idmap);
        if (ret < 0)
-                ret = nfs_map_numeric_to_string(gid, buf, buflen);
+                ret = nfs_map_numeric_to_string(id, buf, buflen);
        return ret;
 }
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index ebeb94ce1b0b..b586fe9af475 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -332,8 +332,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
                inode->i_version = 0;
                inode->i_size = 0;
                clear_nlink(inode);
-                inode->i_uid = -2;
+                inode->i_uid = make_kuid(&init_user_ns, -2);
-                inode->i_gid = -2;
+                inode->i_gid = make_kgid(&init_user_ns, -2);
                inode->i_blocks = 0;
                memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
                nfsi->write_io = 0;
@@ -694,10 +694,7 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
        if (ctx->cred != NULL)
                put_rpccred(ctx->cred);
        dput(ctx->dentry);
-        if (is_sync)
+        nfs_sb_deactive(sb);
-                nfs_sb_deactive(sb);
-        else
-                nfs_sb_deactive_async(sb);
        kfree(ctx->mdsthreshold);
        kfree(ctx);
 }
@@ -714,7 +711,7 @@ EXPORT_SYMBOL_GPL(put_nfs_open_context);
 */
 void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx)
 {
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct nfs_inode *nfsi = NFS_I(inode);
        filp->private_data = get_nfs_open_context(ctx);
@@ -747,7 +744,7 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_c
 static void nfs_file_clear_open_context(struct file *filp)
 {
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct nfs_open_context *ctx = nfs_file_open_context(filp);
        if (ctx) {
@@ -1009,9 +1006,9 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
        /* Have any file permissions changed? */
        if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO))
                invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
-        if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && inode->i_uid != fattr->uid)
+        if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && !uid_eq(inode->i_uid, fattr->uid))
                invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
-        if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && inode->i_gid != fattr->gid)
+        if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && !gid_eq(inode->i_gid, fattr->gid))
                invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
        /* Has the link count changed? */
@@ -1440,7 +1437,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
                                | NFS_INO_REVAL_FORCED);
        if (fattr->valid & NFS_ATTR_FATTR_OWNER) {
-                if (inode->i_uid != fattr->uid) {
+                if (!uid_eq(inode->i_uid, fattr->uid)) {
                        invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
                        inode->i_uid = fattr->uid;
                }
@@ -1451,7 +1448,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
                                | NFS_INO_REVAL_FORCED);
        if (fattr->valid & NFS_ATTR_FATTR_GROUP) {
-                if (inode->i_gid != fattr->gid) {
+                if (!gid_eq(inode->i_gid, fattr->gid)) {
                        invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
                        inode->i_gid = fattr->gid;
                }
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index f0e6c7df1a07..541c9ebdbc5a 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -329,7 +329,6 @@ extern int __init register_nfs_fs(void);
 extern void __exit unregister_nfs_fs(void);
 extern void nfs_sb_active(struct super_block *sb);
 extern void nfs_sb_deactive(struct super_block *sb);
-extern void nfs_sb_deactive_async(struct super_block *sb);
 /* namespace.c */
 #define NFS_PATH_CANONICAL 1
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index dd057bc6b65b..fc8dc20fdeb9 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -177,11 +177,31 @@ out_nofree:
        return mnt;
 }
+static int
+nfs_namespace_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+{
+        if (NFS_FH(dentry->d_inode)->size != 0)
+                return nfs_getattr(mnt, dentry, stat);
+        generic_fillattr(dentry->d_inode, stat);
+        return 0;
+}
+static int
+nfs_namespace_setattr(struct dentry *dentry, struct iattr *attr)
+{
+        if (NFS_FH(dentry->d_inode)->size != 0)
+                return nfs_setattr(dentry, attr);
+        return -EACCES;
+}
 const struct inode_operations nfs_mountpoint_inode_operations = {
        .getattr        = nfs_getattr,
+        .setattr        = nfs_setattr,
 };
 const struct inode_operations nfs_referral_inode_operations = {
+        .getattr        = nfs_namespace_getattr,
+        .setattr        = nfs_namespace_setattr,
 };
 static void nfs_expire_automounts(struct work_struct *work)
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 06b9df49f7f7..62db136339ea 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -290,8 +290,13 @@ static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
        fattr->mode = be32_to_cpup(p++);
        fattr->nlink = be32_to_cpup(p++);
-        fattr->uid = be32_to_cpup(p++);
+        fattr->uid = make_kuid(&init_user_ns, be32_to_cpup(p++));
-        fattr->gid = be32_to_cpup(p++);
+        if (!uid_valid(fattr->uid))
+                goto out_uid;
+        fattr->gid = make_kgid(&init_user_ns, be32_to_cpup(p++));
+        if (!gid_valid(fattr->gid))
+                goto out_gid;
+                
        fattr->size = be32_to_cpup(p++);
        fattr->du.nfs2.blocksize = be32_to_cpup(p++);
@@ -313,6 +318,12 @@ static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
        fattr->change_attr = nfs_timespec_to_change_attr(&fattr->ctime);
        return 0;
+out_uid:
+        dprintk("NFS: returned invalid uid\n");
+        return -EINVAL;
+out_gid:
+        dprintk("NFS: returned invalid gid\n");
+        return -EINVAL;
 out_overflow:
        print_overflow_msg(__func__, xdr);
        return -EIO;
@@ -351,11 +362,11 @@ static void encode_sattr(struct xdr_stream *xdr, const struct iattr *attr)
        else
                *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
        if (attr->ia_valid & ATTR_UID)
-                *p++ = cpu_to_be32(attr->ia_uid);
+                *p++ = cpu_to_be32(from_kuid(&init_user_ns, attr->ia_uid));
        else
                *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
        if (attr->ia_valid & ATTR_GID)
-                *p++ = cpu_to_be32(attr->ia_gid);
+                *p++ = cpu_to_be32(from_kgid(&init_user_ns, attr->ia_gid));
        else
                *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
        if (attr->ia_valid & ATTR_SIZE)
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 70efb63b1e42..43ea96ced28c 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -872,7 +872,7 @@ static void nfs3_proc_commit_setup(struct nfs_commit_data *data, struct rpc_mess
 static int
 nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
 {
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl);
 }
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index bffc32406fbf..fa6d72131c19 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -592,13 +592,13 @@ static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr)
        if (attr->ia_valid & ATTR_UID) {
                *p++ = xdr_one;
-                *p++ = cpu_to_be32(attr->ia_uid);
+                *p++ = cpu_to_be32(from_kuid(&init_user_ns, attr->ia_uid));
        } else
                *p++ = xdr_zero;
        if (attr->ia_valid & ATTR_GID) {
                *p++ = xdr_one;
-                *p++ = cpu_to_be32(attr->ia_gid);
+                *p++ = cpu_to_be32(from_kgid(&init_user_ns, attr->ia_gid));
        } else
                *p++ = xdr_zero;
@@ -657,8 +657,12 @@ static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr)
        fattr->mode = (be32_to_cpup(p++) & ~S_IFMT) | fmode;
        fattr->nlink = be32_to_cpup(p++);
-        fattr->uid = be32_to_cpup(p++);
+        fattr->uid = make_kuid(&init_user_ns, be32_to_cpup(p++));
-        fattr->gid = be32_to_cpup(p++);
+        if (!uid_valid(fattr->uid))
+                goto out_uid;
+        fattr->gid = make_kgid(&init_user_ns, be32_to_cpup(p++));
+        if (!gid_valid(fattr->gid))
+                goto out_gid;
        p = xdr_decode_size3(p, &fattr->size);
        p = xdr_decode_size3(p, &fattr->du.nfs3.used);
@@ -675,6 +679,12 @@ static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr)
        fattr->valid |= NFS_ATTR_FATTR_V3;
        return 0;
+out_uid:
+        dprintk("NFS: returned invalid uid\n");
+        return -EINVAL;
+out_gid:
+        dprintk("NFS: returned invalid gid\n");
+        return -EINVAL;
 out_overflow:
        print_overflow_msg(__func__, xdr);
        return -EIO;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index a3f488b074a2..944c9a5c1039 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -13,6 +13,8 @@
 #define NFS4_MAX_LOOP_ON_RECOVER (10)
+#include <linux/seqlock.h>
 struct idmap;
 enum nfs4_client_state {
@@ -90,6 +92,8 @@ struct nfs4_state_owner {
        unsigned long        so_flags;
        struct list_head     so_states;
        struct nfs_seqid_counter so_seqid;
+        seqcount_t           so_reclaim_seqcount;
+        struct mutex         so_delegreturn_mutex;
 };
 enum {
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index e5f2fad14ff8..ac4fc9a8fdbc 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -30,15 +30,14 @@ static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion)
        if (clp->rpc_ops->version != 4 || minorversion != 0)
                return ret;
-retry:
+        idr_preload(GFP_KERNEL);
-        if (!idr_pre_get(&nn->cb_ident_idr, GFP_KERNEL))
-                return -ENOMEM;
        spin_lock(&nn->nfs_client_lock);
-        ret = idr_get_new(&nn->cb_ident_idr, clp, &clp->cl_cb_ident);
+        ret = idr_alloc(&nn->cb_ident_idr, clp, 0, 0, GFP_NOWAIT);
+        if (ret >= 0)
+                clp->cl_cb_ident = ret;
        spin_unlock(&nn->nfs_client_lock);
-        if (ret == -EAGAIN)
+        idr_preload_end();
-                goto retry;
+        return ret < 0 ? ret : 0;
-        return ret;
 }
 #ifdef CONFIG_NFS_V4_1
@@ -237,11 +236,10 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
        error = nfs4_discover_server_trunking(clp, &old);
        if (error < 0)
                goto error;
+        nfs_put_client(clp);
        if (clp != old) {
                clp->cl_preserve_clid = true;
-                nfs_put_client(clp);
                clp = old;
-                atomic_inc(&clp->cl_count);
        }
        return clp;
@@ -307,7 +305,7 @@ int nfs40_walk_client_list(struct nfs_client *new,
                .clientid       = new->cl_clientid,
                .confirm        = new->cl_confirm,
        };
-        int status;
+        int status = -NFS4ERR_STALE_CLIENTID;
        spin_lock(&nn->nfs_client_lock);
        list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) {
@@ -333,40 +331,33 @@ int nfs40_walk_client_list(struct nfs_client *new,
                if (prev)
                        nfs_put_client(prev);
+                prev = pos;
                status = nfs4_proc_setclientid_confirm(pos, &clid, cred);
-                if (status == 0) {
+                switch (status) {
+                case -NFS4ERR_STALE_CLIENTID:
+                        break;
+                case 0:
                        nfs4_swap_callback_idents(pos, new);
-                        nfs_put_client(pos);
+                        prev = NULL;
                        *result = pos;
                        dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n",
                                __func__, pos, atomic_read(&pos->cl_count));
-                        return 0;
+                default:
-                }
+                        goto out;
-                if (status != -NFS4ERR_STALE_CLIENTID) {
-                        nfs_put_client(pos);
-                        dprintk("NFS: <-- %s status = %d, no result\n",
-                                __func__, status);
-                        return status;
                }
                spin_lock(&nn->nfs_client_lock);
-                prev = pos;
        }
+        spin_unlock(&nn->nfs_client_lock);
-        /*
+        /* No match found. The server lost our clientid */
-         * No matching nfs_client found.  This should be impossible,
+out:
-         * because the new nfs_client has already been added to
-         * nfs_client_list by nfs_get_client().
-         *
-         * Don't BUG(), since the caller is holding a mutex.
-         */
        if (prev)
                nfs_put_client(prev);
-        spin_unlock(&nn->nfs_client_lock);
+        dprintk("NFS: <-- %s status = %d\n", __func__, status);
-        pr_err("NFS: %s Error: no matching nfs_client found\n", __func__);
+        return status;
-        return -NFS4ERR_STALE_CLIENTID;
 }
 #ifdef CONFIG_NFS_V4_1
@@ -433,7 +424,7 @@ int nfs41_walk_client_list(struct nfs_client *new,
 {
        struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id);
        struct nfs_client *pos, *n, *prev = NULL;
-        int error;
+        int status = -NFS4ERR_STALE_CLIENTID;
        spin_lock(&nn->nfs_client_lock);
        list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) {
@@ -449,14 +440,17 @@ int nfs41_walk_client_list(struct nfs_client *new,
                                nfs_put_client(prev);
                        prev = pos;
-                        error = nfs_wait_client_init_complete(pos);
+                        nfs4_schedule_lease_recovery(pos);
-                        if (error < 0) {
+                        status = nfs_wait_client_init_complete(pos);
+                        if (status < 0) {
                                nfs_put_client(pos);
                                spin_lock(&nn->nfs_client_lock);
                                continue;
                        }
+                        status = pos->cl_cons_state;
                        spin_lock(&nn->nfs_client_lock);
+                        if (status < 0)
+                                continue;
                }
                if (pos->rpc_ops != new->rpc_ops)
@@ -474,6 +468,7 @@ int nfs41_walk_client_list(struct nfs_client *new,
                if (!nfs4_match_serverowners(pos, new))
                        continue;
+                atomic_inc(&pos->cl_count);
                spin_unlock(&nn->nfs_client_lock);
                dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n",
                        __func__, pos, atomic_read(&pos->cl_count));
@@ -482,16 +477,10 @@ int nfs41_walk_client_list(struct nfs_client *new,
                return 0;
        }
-        /*
+        /* No matching nfs_client found. */
-         * No matching nfs_client found.  This should be impossible,
-         * because the new nfs_client has already been added to
-         * nfs_client_list by nfs_get_client().
-         *
-         * Don't BUG(), since the caller is holding a mutex.
-         */
        spin_unlock(&nn->nfs_client_lock);
-        pr_err("NFS: %s Error: no matching nfs_client found\n", __func__);
+        dprintk("NFS: <-- %s status = %d\n", __func__, status);
-        return -NFS4ERR_STALE_CLIENTID;
+        return status;
 }
 #endif  /* CONFIG_NFS_V4_1 */
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 08ddcccb8887..13e6bb3e3fe5 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -94,7 +94,7 @@ static int
 nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 {
        int ret;
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(file);
        do {
                ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 5d864fb36578..eae83bf96c6d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -896,6 +896,8 @@ static int can_open_delegated(struct nfs_delegation *delegation, fmode_t fmode)
                return 0;
        if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags))
                return 0;
+        if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
+                return 0;
        nfs_mark_delegation_referenced(delegation);
        return 1;
 }
@@ -973,6 +975,7 @@ static int update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stat
        spin_lock(&deleg_cur->lock);
        if (nfsi->delegation != deleg_cur ||
+           test_bit(NFS_DELEGATION_RETURNING, &deleg_cur->flags) ||
            (deleg_cur->type & fmode) != fmode)
                goto no_delegation_unlock;
@@ -1352,19 +1355,18 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
                        case -NFS4ERR_BAD_HIGH_SLOT:
                        case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
                        case -NFS4ERR_DEADSESSION:
+                                set_bit(NFS_DELEGATED_STATE, &state->flags);
                                nfs4_schedule_session_recovery(server->nfs_client->cl_session, err);
+                                err = -EAGAIN;
                                goto out;
                        case -NFS4ERR_STALE_CLIENTID:
                        case -NFS4ERR_STALE_STATEID:
+                                set_bit(NFS_DELEGATED_STATE, &state->flags);
                        case -NFS4ERR_EXPIRED:
                                /* Don't recall a delegation if it was lost */
                                nfs4_schedule_lease_recovery(server->nfs_client);
+                                err = -EAGAIN;
                                goto out;
-                        case -ERESTARTSYS:
-                                /*
-                                 * The show must go on: exit, but mark the
-                                 * stateid as needing recovery.
-                                 */
                        case -NFS4ERR_DELEG_REVOKED:
                        case -NFS4ERR_ADMIN_REVOKED:
                        case -NFS4ERR_BAD_STATEID:
@@ -1375,6 +1377,7 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
                                err = 0;
                                goto out;
                }
+                set_bit(NFS_DELEGATED_STATE, &state->flags);
                err = nfs4_handle_exception(server, err, &exception);
        } while (exception.retry);
 out:
@@ -1463,7 +1466,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
        struct nfs4_state_owner *sp = data->owner;
        if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0)
-                return;
+                goto out_wait;
        /*
         * Check if we still need to send an OPEN call, or if we can use
         * a delegation instead.
@@ -1498,6 +1501,7 @@ unlock_no_action:
        rcu_read_unlock();
 out_no_action:
        task->tk_action = NULL;
+out_wait:
        nfs4_sequence_done(task, &data->o_res.seq_res);
 }
@@ -1626,7 +1630,8 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data)
 static int nfs4_opendata_access(struct rpc_cred *cred,
                                struct nfs4_opendata *opendata,
-                                struct nfs4_state *state, fmode_t fmode)
+                                struct nfs4_state *state, fmode_t fmode,
+                                int openflags)
 {
        struct nfs_access_entry cache;
        u32 mask;
@@ -1638,11 +1643,14 @@ static int nfs4_opendata_access(struct rpc_cred *cred,
        mask = 0;
        /* don't check MAY_WRITE - a newly created file may not have
-         * write mode bits, but POSIX allows the creating process to write */
+         * write mode bits, but POSIX allows the creating process to write.
-        if (fmode & FMODE_READ)
+         * use openflags to check for exec, because fmode won't
-                mask |= MAY_READ;
+         * always have FMODE_EXEC set when file open for exec. */
-        if (fmode & FMODE_EXEC)
+        if (openflags & __FMODE_EXEC) {
-                mask |= MAY_EXEC;
+                /* ONLY check for exec rights */
+                mask = MAY_EXEC;
+        } else if (fmode & FMODE_READ)
+                mask = MAY_READ;
        cache.cred = cred;
        cache.jiffies = jiffies;
@@ -1841,6 +1849,43 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct
                sattr->ia_valid |= ATTR_MTIME;
 }
+static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
+                fmode_t fmode,
+                int flags,
+                struct nfs4_state **res)
+{
+        struct nfs4_state_owner *sp = opendata->owner;
+        struct nfs_server *server = sp->so_server;
+        struct nfs4_state *state;
+        unsigned int seq;
+        int ret;
+        seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
+        ret = _nfs4_proc_open(opendata);
+        if (ret != 0)
+                goto out;
+        state = nfs4_opendata_to_nfs4_state(opendata);
+        ret = PTR_ERR(state);
+        if (IS_ERR(state))
+                goto out;
+        if (server->caps & NFS_CAP_POSIX_LOCK)
+                set_bit(NFS_STATE_POSIX_LOCKS, &state->flags);
+        ret = nfs4_opendata_access(sp->so_cred, opendata, state, fmode, flags);
+        if (ret != 0)
+                goto out;
+        if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) {
+                nfs4_schedule_stateid_recovery(server, state);
+                nfs4_wait_clnt_recover(server->nfs_client);
+        }
+        *res = state;
+out:
+        return ret;
+}
 /*
 * Returns a referenced nfs4_state
 */
@@ -1885,18 +1930,7 @@ static int _nfs4_do_open(struct inode *dir,
        if (dentry->d_inode != NULL)
                opendata->state = nfs4_get_open_state(dentry->d_inode, sp);
-        status = _nfs4_proc_open(opendata);
+        status = _nfs4_open_and_get_state(opendata, fmode, flags, &state);
-        if (status != 0)
-                goto err_opendata_put;
-        state = nfs4_opendata_to_nfs4_state(opendata);
-        status = PTR_ERR(state);
-        if (IS_ERR(state))
-                goto err_opendata_put;
-        if (server->caps & NFS_CAP_POSIX_LOCK)
-                set_bit(NFS_STATE_POSIX_LOCKS, &state->flags);
-        status = nfs4_opendata_access(cred, opendata, state, fmode);
        if (status != 0)
                goto err_opendata_put;
@@ -2084,7 +2118,7 @@ static void nfs4_free_closedata(void *data)
        nfs4_put_open_state(calldata->state);
        nfs_free_seqid(calldata->arg.seqid);
        nfs4_put_state_owner(sp);
-        nfs_sb_deactive_async(sb);
+        nfs_sb_deactive(sb);
        kfree(calldata);
 }
@@ -2146,7 +2180,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
        dprintk("%s: begin!\n", __func__);
        if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
-                return;
+                goto out_wait;
        task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
        calldata->arg.fmode = FMODE_READ|FMODE_WRITE;
@@ -2168,16 +2202,14 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
        if (!call_close) {
                /* Note: exit _without_ calling nfs4_close_done */
-                task->tk_action = NULL;
+                goto out_no_action;
-                nfs4_sequence_done(task, &calldata->res.seq_res);
-                goto out;
        }
        if (calldata->arg.fmode == 0) {
                task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE];
                if (calldata->roc &&
                    pnfs_roc_drain(inode, &calldata->roc_barrier, task))
-                        goto out;
+                        goto out_wait;
        }
        nfs_fattr_init(calldata->res.fattr);
@@ -2187,8 +2219,12 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
                                &calldata->res.seq_res,
                                task) != 0)
                nfs_release_seqid(calldata->arg.seqid);
-out:
        dprintk("%s: done!\n", __func__);
+        return;
+out_no_action:
+        task->tk_action = NULL;
+out_wait:
+        nfs4_sequence_done(task, &calldata->res.seq_res);
 }
 static const struct rpc_call_ops nfs4_close_ops = {
@@ -4419,12 +4455,10 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
        struct nfs4_unlockdata *calldata = data;
        if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
-                return;
+                goto out_wait;
        if (test_bit(NFS_LOCK_INITIALIZED, &calldata->lsp->ls_flags) == 0) {
                /* Note: exit _without_ running nfs4_locku_done */
-                task->tk_action = NULL;
+                goto out_no_action;
-                nfs4_sequence_done(task, &calldata->res.seq_res);
-                return;
        }
        calldata->timestamp = jiffies;
        if (nfs4_setup_sequence(calldata->server,
@@ -4432,6 +4466,11 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
                                &calldata->res.seq_res,
                                task) != 0)
                nfs_release_seqid(calldata->arg.seqid);
+        return;
+out_no_action:
+        task->tk_action = NULL;
+out_wait:
+        nfs4_sequence_done(task, &calldata->res.seq_res);
 }
 static const struct rpc_call_ops nfs4_locku_ops = {
@@ -4478,7 +4517,9 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
 static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request)
 {
-        struct nfs_inode *nfsi = NFS_I(state->inode);
+        struct inode *inode = state->inode;
+        struct nfs4_state_owner *sp = state->owner;
+        struct nfs_inode *nfsi = NFS_I(inode);
        struct nfs_seqid *seqid;
        struct nfs4_lock_state *lsp;
        struct rpc_task *task;
@@ -4488,12 +4529,17 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
        status = nfs4_set_lock_state(state, request);
        /* Unlock _before_ we do the RPC call */
        request->fl_flags |= FL_EXISTS;
+        /* Exclude nfs_delegation_claim_locks() */
+        mutex_lock(&sp->so_delegreturn_mutex);
+        /* Exclude nfs4_reclaim_open_stateid() - note nesting! */
        down_read(&nfsi->rwsem);
        if (do_vfs_lock(request->fl_file, request) == -ENOENT) {
                up_read(&nfsi->rwsem);
+                mutex_unlock(&sp->so_delegreturn_mutex);
                goto out;
        }
        up_read(&nfsi->rwsem);
+        mutex_unlock(&sp->so_delegreturn_mutex);
        if (status != 0)
                goto out;
        /* Is this a delegated lock? */
@@ -4572,7 +4618,7 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
        dprintk("%s: begin!\n", __func__);
        if (nfs_wait_on_sequence(data->arg.lock_seqid, task) != 0)
-                return;
+                goto out_wait;
        /* Do we need to do an open_to_lock_owner? */
        if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) {
                if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) {
@@ -4592,6 +4638,8 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
        nfs_release_seqid(data->arg.open_seqid);
 out_release_lock_seqid:
        nfs_release_seqid(data->arg.lock_seqid);
+out_wait:
+        nfs4_sequence_done(task, &data->res.seq_res);
        dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status);
 }
@@ -4809,8 +4857,10 @@ static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *reques
 static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
 {
+        struct nfs4_state_owner *sp = state->owner;
        struct nfs_inode *nfsi = NFS_I(state->inode);
        unsigned char fl_flags = request->fl_flags;
+        unsigned int seq;
        int status = -ENOLCK;
        if ((fl_flags & FL_POSIX) &&
@@ -4832,9 +4882,16 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
                status = do_vfs_lock(request->fl_file, request);
                goto out_unlock;
        }
+        seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
+        up_read(&nfsi->rwsem);
        status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW);
        if (status != 0)
+                goto out;
+        down_read(&nfsi->rwsem);
+        if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) {
+                status = -NFS4ERR_DELAY;
                goto out_unlock;
+        }
        /* Note: we always want to sleep here! */
        request->fl_flags = fl_flags | FL_SLEEP;
        if (do_vfs_lock(request->fl_file, request) < 0)
@@ -4941,24 +4998,22 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
                        case 0:
                        case -ESTALE:
                                goto out;
-                        case -NFS4ERR_EXPIRED:
-                                nfs4_schedule_stateid_recovery(server, state);
                        case -NFS4ERR_STALE_CLIENTID:
                        case -NFS4ERR_STALE_STATEID:
+                                set_bit(NFS_DELEGATED_STATE, &state->flags);
+                        case -NFS4ERR_EXPIRED:
                                nfs4_schedule_lease_recovery(server->nfs_client);
+                                err = -EAGAIN;
                                goto out;
                        case -NFS4ERR_BADSESSION:
                        case -NFS4ERR_BADSLOT:
                        case -NFS4ERR_BAD_HIGH_SLOT:
                        case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
                        case -NFS4ERR_DEADSESSION:
+                                set_bit(NFS_DELEGATED_STATE, &state->flags);
                                nfs4_schedule_session_recovery(server->nfs_client->cl_session, err);
+                                err = -EAGAIN;
                                goto out;
-                        case -ERESTARTSYS:
-                                /*
-                                 * The show must go on: exit, but mark the
-                                 * stateid as needing recovery.
-                                 */
                        case -NFS4ERR_DELEG_REVOKED:
                        case -NFS4ERR_ADMIN_REVOKED:
                        case -NFS4ERR_BAD_STATEID:
@@ -4971,9 +5026,8 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
                                /* kill_proc(fl->fl_pid, SIGLOST, 1); */
                                err = 0;
                                goto out;
-                        case -NFS4ERR_DELAY:
-                                break;
                }
+                set_bit(NFS_DELEGATED_STATE, &state->flags);
                err = nfs4_handle_exception(server, err, &exception);
        } while (exception.retry);
 out:
@@ -6130,7 +6184,8 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags)
        status = nfs4_wait_for_completion_rpc_task(task);
        if (status == 0)
                status = task->tk_status;
-        if (status == 0)
+        /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */
+        if (status == 0 && lgp->res.layoutp->len)
                lseg = pnfs_layout_process(lgp);
        rpc_put_task(task);
        dprintk("<-- %s status=%d\n", __func__, status);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 9448c579d41a..6ace365c6334 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -136,16 +136,11 @@ int nfs40_discover_server_trunking(struct nfs_client *clp,
        clp->cl_confirm = clid.confirm;
        status = nfs40_walk_client_list(clp, result, cred);
-        switch (status) {
+        if (status == 0) {
-        case -NFS4ERR_STALE_CLIENTID:
-                set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
-        case 0:
                /* Sustain the lease, even if it's empty.  If the clientid4
                 * goes stale it's of no use for trunking discovery. */
                nfs4_schedule_state_renewal(*result);
-                break;
        }
 out:
        return status;
 }
@@ -523,6 +518,8 @@ nfs4_alloc_state_owner(struct nfs_server *server,
        nfs4_init_seqid_counter(&sp->so_seqid);
        atomic_set(&sp->so_count, 1);
        INIT_LIST_HEAD(&sp->so_lru);
+        seqcount_init(&sp->so_reclaim_seqcount);
+        mutex_init(&sp->so_delegreturn_mutex);
        return sp;
 }
@@ -1395,8 +1392,9 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs
         * recovering after a network partition or a reboot from a
         * server that doesn't support a grace period.
         */
-restart:
        spin_lock(&sp->so_lock);
+        write_seqcount_begin(&sp->so_reclaim_seqcount);
+restart:
        list_for_each_entry(state, &sp->so_states, open_states) {
                if (!test_and_clear_bit(ops->state_flag_bit, &state->flags))
                        continue;
@@ -1417,6 +1415,7 @@ restart:
                                }
                                spin_unlock(&state->state_lock);
                                nfs4_put_open_state(state);
+                                spin_lock(&sp->so_lock);
                                goto restart;
                        }
                }
@@ -1454,12 +1453,17 @@ restart:
                                goto out_err;
                }
                nfs4_put_open_state(state);
+                spin_lock(&sp->so_lock);
                goto restart;
        }
+        write_seqcount_end(&sp->so_reclaim_seqcount);
        spin_unlock(&sp->so_lock);
        return 0;
 out_err:
        nfs4_put_open_state(state);
+        spin_lock(&sp->so_lock);
+        write_seqcount_end(&sp->so_reclaim_seqcount);
+        spin_unlock(&sp->so_lock);
        return status;
 }
@@ -1863,6 +1867,7 @@ again:
        case -ETIMEDOUT:
        case -EAGAIN:
                ssleep(1);
+        case -NFS4ERR_STALE_CLIENTID:
                dprintk("NFS: %s after status %d, retrying\n",
                        __func__, status);
                goto again;
@@ -2022,8 +2027,18 @@ static int nfs4_reset_session(struct nfs_client *clp)
        nfs4_begin_drain_session(clp);
        cred = nfs4_get_exchange_id_cred(clp);
        status = nfs4_proc_destroy_session(clp->cl_session, cred);
-        if (status && status != -NFS4ERR_BADSESSION &&
+        switch (status) {
-            status != -NFS4ERR_DEADSESSION) {
+        case 0:
+        case -NFS4ERR_BADSESSION:
+        case -NFS4ERR_DEADSESSION:
+                break;
+        case -NFS4ERR_BACK_CHAN_BUSY:
+        case -NFS4ERR_DELAY:
+                set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
+                status = 0;
+                ssleep(1);
+                goto out;
+        default:
                status = nfs4_recovery_handle_error(clp, status);
                goto out;
        }
diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c
index 84d2e9e2f313..569b166cc050 100644
--- a/fs/nfs/nfs4super.c
+++ b/fs/nfs/nfs4super.c
@@ -28,7 +28,7 @@ static struct file_system_type nfs4_remote_fs_type = {
        .name           = "nfs4",
        .mount          = nfs4_remote_mount,
        .kill_sb        = nfs_kill_super,
-        .fs_flags       = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+        .fs_flags       = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
 };
 static struct file_system_type nfs4_remote_referral_fs_type = {
@@ -36,7 +36,7 @@ static struct file_system_type nfs4_remote_referral_fs_type = {
        .name           = "nfs4",
        .mount          = nfs4_remote_referral_mount,
        .kill_sb        = nfs_kill_super,
-        .fs_flags       = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+        .fs_flags       = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
 };
 struct file_system_type nfs4_referral_fs_type = {
@@ -44,7 +44,7 @@ struct file_system_type nfs4_referral_fs_type = {
        .name           = "nfs4",
        .mount          = nfs4_referral_mount,
        .kill_sb        = nfs_kill_super,
-        .fs_flags       = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+        .fs_flags       = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
 };
 static const struct super_operations nfs4_sops = {
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 26b143920433..e3edda554ac7 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1002,7 +1002,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
                owner_namelen = nfs_map_uid_to_name(server, iap->ia_uid, owner_name, IDMAP_NAMESZ);
                if (owner_namelen < 0) {
                        dprintk("nfs: couldn't resolve uid %d to string\n",
-                                        iap->ia_uid);
+                                        from_kuid(&init_user_ns, iap->ia_uid));
                        /* XXX */
                        strcpy(owner_name, "nobody");
                        owner_namelen = sizeof("nobody") - 1;
@@ -1014,7 +1014,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
                owner_grouplen = nfs_map_gid_to_group(server, iap->ia_gid, owner_group, IDMAP_NAMESZ);
                if (owner_grouplen < 0) {
                        dprintk("nfs: couldn't resolve gid %d to string\n",
-                                        iap->ia_gid);
+                                        from_kgid(&init_user_ns, iap->ia_gid));
                        strcpy(owner_group, "nobody");
                        owner_grouplen = sizeof("nobody") - 1;
                        /* goto out; */
@@ -3778,14 +3778,14 @@ out_overflow:
 }
 static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap,
-                const struct nfs_server *server, uint32_t *uid,
+                const struct nfs_server *server, kuid_t *uid,
                struct nfs4_string *owner_name)
 {
        uint32_t len;
        __be32 *p;
        int ret = 0;
-        *uid = -2;
+        *uid = make_kuid(&init_user_ns, -2);
        if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER - 1U)))
                return -EIO;
        if (likely(bitmap[1] & FATTR4_WORD1_OWNER)) {
@@ -3813,7 +3813,7 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap,
                                        __func__, len);
                bitmap[1] &= ~FATTR4_WORD1_OWNER;
        }
-        dprintk("%s: uid=%d\n", __func__, (int)*uid);
+        dprintk("%s: uid=%d\n", __func__, (int)from_kuid(&init_user_ns, *uid));
        return ret;
 out_overflow:
        print_overflow_msg(__func__, xdr);
@@ -3821,14 +3821,14 @@ out_overflow:
 }
 static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
-                const struct nfs_server *server, uint32_t *gid,
+                const struct nfs_server *server, kgid_t *gid,
                struct nfs4_string *group_name)
 {
        uint32_t len;
        __be32 *p;
        int ret = 0;
-        *gid = -2;
+        *gid = make_kgid(&init_user_ns, -2);
        if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER_GROUP - 1U)))
                return -EIO;
        if (likely(bitmap[1] & FATTR4_WORD1_OWNER_GROUP)) {
@@ -3856,7 +3856,7 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
                                        __func__, len);
                bitmap[1] &= ~FATTR4_WORD1_OWNER_GROUP;
        }
-        dprintk("%s: gid=%d\n", __func__, (int)*gid);
+        dprintk("%s: gid=%d\n", __func__, (int)from_kgid(&init_user_ns, *gid));
        return ret;
 out_overflow:
        print_overflow_msg(__func__, xdr);
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index c6f990656f89..88f9611a945c 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -647,6 +647,7 @@ static struct pnfs_layoutdriver_type objlayout_type = {
        .flags                   = PNFS_LAYOUTRET_ON_SETATTR |
                                   PNFS_LAYOUTRET_ON_ERROR,
+        .owner                   = THIS_MODULE,
        .alloc_layout_hdr        = objlayout_alloc_layout_hdr,
        .free_layout_hdr         = objlayout_free_layout_hdr,
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index e7165d915362..6be70f622b62 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -254,7 +254,7 @@ static void
 pnfs_layout_set_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit)
 {
        lo->plh_retry_timestamp = jiffies;
-        if (test_and_set_bit(fail_bit, &lo->plh_flags))
+        if (!test_and_set_bit(fail_bit, &lo->plh_flags))
                atomic_inc(&lo->plh_refcount);
 }
@@ -505,37 +505,147 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
 }
 EXPORT_SYMBOL_GPL(pnfs_destroy_layout);
-/*
+static bool
- * Called by the state manger to remove all layouts established under an
+pnfs_layout_add_bulk_destroy_list(struct inode *inode,
- * expired lease.
+                struct list_head *layout_list)
- */
-void
-pnfs_destroy_all_layouts(struct nfs_client *clp)
 {
-        struct nfs_server *server;
        struct pnfs_layout_hdr *lo;
-        LIST_HEAD(tmp_list);
+        bool ret = false;
-        nfs4_deviceid_mark_client_invalid(clp);
+        spin_lock(&inode->i_lock);
-        nfs4_deviceid_purge_client(clp);
+        lo = NFS_I(inode)->layout;
+        if (lo != NULL && list_empty(&lo->plh_bulk_destroy)) {
+                pnfs_get_layout_hdr(lo);
+                list_add(&lo->plh_bulk_destroy, layout_list);
+                ret = true;
+        }
+        spin_unlock(&inode->i_lock);
+        return ret;
+}
+/* Caller must hold rcu_read_lock and clp->cl_lock */
+static int
+pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp,
+                struct nfs_server *server,
+                struct list_head *layout_list)
+{
+        struct pnfs_layout_hdr *lo, *next;
+        struct inode *inode;
+        list_for_each_entry_safe(lo, next, &server->layouts, plh_layouts) {
+                inode = igrab(lo->plh_inode);
+                if (inode == NULL)
+                        continue;
+                list_del_init(&lo->plh_layouts);
+                if (pnfs_layout_add_bulk_destroy_list(inode, layout_list))
+                        continue;
+                rcu_read_unlock();
+                spin_unlock(&clp->cl_lock);
+                iput(inode);
+                spin_lock(&clp->cl_lock);
+                rcu_read_lock();
+                return -EAGAIN;
+        }
+        return 0;
+}
+static int
+pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
+                bool is_bulk_recall)
+{
+        struct pnfs_layout_hdr *lo;
+        struct inode *inode;
+        struct pnfs_layout_range range = {
+                .iomode = IOMODE_ANY,
+                .offset = 0,
+                .length = NFS4_MAX_UINT64,
+        };
+        LIST_HEAD(lseg_list);
+        int ret = 0;
+        while (!list_empty(layout_list)) {
+                lo = list_entry(layout_list->next, struct pnfs_layout_hdr,
+                                plh_bulk_destroy);
+                dprintk("%s freeing layout for inode %lu\n", __func__,
+                        lo->plh_inode->i_ino);
+                inode = lo->plh_inode;
+                spin_lock(&inode->i_lock);
+                list_del_init(&lo->plh_bulk_destroy);
+                lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */
+                if (is_bulk_recall)
+                        set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
+                if (pnfs_mark_matching_lsegs_invalid(lo, &lseg_list, &range))
+                        ret = -EAGAIN;
+                spin_unlock(&inode->i_lock);
+                pnfs_free_lseg_list(&lseg_list);
+                pnfs_put_layout_hdr(lo);
+                iput(inode);
+        }
+        return ret;
+}
+int
+pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
+                struct nfs_fsid *fsid,
+                bool is_recall)
+{
+        struct nfs_server *server;
+        LIST_HEAD(layout_list);
        spin_lock(&clp->cl_lock);
        rcu_read_lock();
+restart:
        list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
-                if (!list_empty(&server->layouts))
+                if (memcmp(&server->fsid, fsid, sizeof(*fsid)) != 0)
-                        list_splice_init(&server->layouts, &tmp_list);
+                        continue;
+                if (pnfs_layout_bulk_destroy_byserver_locked(clp,
+                                server,
+                                &layout_list) != 0)
+                        goto restart;
        }
        rcu_read_unlock();
        spin_unlock(&clp->cl_lock);
-        while (!list_empty(&tmp_list)) {
+        if (list_empty(&layout_list))
-                lo = list_entry(tmp_list.next, struct pnfs_layout_hdr,
+                return 0;
-                                plh_layouts);
+        return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall);
-                dprintk("%s freeing layout for inode %lu\n", __func__,
+}
-                        lo->plh_inode->i_ino);
-                list_del_init(&lo->plh_layouts);
+int
-                pnfs_destroy_layout(NFS_I(lo->plh_inode));
+pnfs_destroy_layouts_byclid(struct nfs_client *clp,
+                bool is_recall)
+{
+        struct nfs_server *server;
+        LIST_HEAD(layout_list);
+        spin_lock(&clp->cl_lock);
+        rcu_read_lock();
+restart:
+        list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+                if (pnfs_layout_bulk_destroy_byserver_locked(clp,
+                                        server,
+                                        &layout_list) != 0)
+                        goto restart;
        }
+        rcu_read_unlock();
+        spin_unlock(&clp->cl_lock);
+        if (list_empty(&layout_list))
+                return 0;
+        return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall);
+}
+/*
+ * Called by the state manger to remove all layouts established under an
+ * expired lease.
+ */
+void
+pnfs_destroy_all_layouts(struct nfs_client *clp)
+{
+        nfs4_deviceid_mark_client_invalid(clp);
+        nfs4_deviceid_purge_client(clp);
+        pnfs_destroy_layouts_byclid(clp, false);
 }
 /*
@@ -888,7 +998,7 @@ alloc_init_layout_hdr(struct inode *ino,
        atomic_set(&lo->plh_refcount, 1);
        INIT_LIST_HEAD(&lo->plh_layouts);
        INIT_LIST_HEAD(&lo->plh_segs);
-        INIT_LIST_HEAD(&lo->plh_bulk_recall);
+        INIT_LIST_HEAD(&lo->plh_bulk_destroy);
        lo->plh_inode = ino;
        lo->plh_lc_cred = get_rpccred(ctx->state->owner->so_cred);
        return lo;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index dbf7bba52da0..97cb358bb882 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -132,7 +132,7 @@ struct pnfs_layoutdriver_type {
 struct pnfs_layout_hdr {
        atomic_t                plh_refcount;
        struct list_head        plh_layouts;   /* other client layouts */
-        struct list_head        plh_bulk_recall; /* clnt list of bulk recalls */
+        struct list_head        plh_bulk_destroy;
        struct list_head        plh_segs;      /* layout segments list */
        nfs4_stateid            plh_stateid;
        atomic_t                plh_outstanding; /* number of RPCs out */
@@ -196,6 +196,11 @@ struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp);
 void pnfs_free_lseg_list(struct list_head *tmp_list);
 void pnfs_destroy_layout(struct nfs_inode *);
 void pnfs_destroy_all_layouts(struct nfs_client *);
+int pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
+                struct nfs_fsid *fsid,
+                bool is_recall);
+int pnfs_destroy_layouts_byclid(struct nfs_client *clp,
+                bool is_recall);
 void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo);
 void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
                             const nfs4_stateid *new,
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
index d35b62e83ea6..6da209bd9408 100644
--- a/fs/nfs/pnfs_dev.c
+++ b/fs/nfs/pnfs_dev.c
@@ -77,9 +77,8 @@ _lookup_deviceid(const struct pnfs_layoutdriver_type *ld,
                 long hash)
 {
        struct nfs4_deviceid_node *d;
-        struct hlist_node *n;
-        hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node)
+        hlist_for_each_entry_rcu(d, &nfs4_deviceid_cache[hash], node)
                if (d->ld == ld && d->nfs_client == clp &&
                    !memcmp(&d->deviceid, id, sizeof(*id))) {
                        if (atomic_read(&d->ref))
@@ -248,12 +247,11 @@ static void
 _deviceid_purge_client(const struct nfs_client *clp, long hash)
 {
        struct nfs4_deviceid_node *d;
-        struct hlist_node *n;
        HLIST_HEAD(tmp);
        spin_lock(&nfs4_deviceid_lock);
        rcu_read_lock();
-        hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node)
+        hlist_for_each_entry_rcu(d, &nfs4_deviceid_cache[hash], node)
                if (d->nfs_client == clp && atomic_read(&d->ref)) {
                        hlist_del_init_rcu(&d->node);
                        hlist_add_head(&d->tmpnode, &tmp);
@@ -291,12 +289,11 @@ void
 nfs4_deviceid_mark_client_invalid(struct nfs_client *clp)
 {
        struct nfs4_deviceid_node *d;
-        struct hlist_node *n;
        int i;
        rcu_read_lock();
        for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i ++){
-                hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[i], node)
+                hlist_for_each_entry_rcu(d, &nfs4_deviceid_cache[i], node)
                        if (d->nfs_client == clp)
                                set_bit(NFS_DEVICEID_INVALID, &d->flags);
        }
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index f084dac948e1..fc8de9016acf 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -662,7 +662,7 @@ nfs_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg)
 static int
 nfs_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
 {
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl);
 }
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index b6bdb18e892c..a5e5d9899d56 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -91,12 +91,16 @@ void nfs_readdata_release(struct nfs_read_data *rdata)
        put_nfs_open_context(rdata->args.context);
        if (rdata->pages.pagevec != rdata->pages.page_array)
                kfree(rdata->pages.pagevec);
-        if (rdata != &read_header->rpc_data)
+        if (rdata == &read_header->rpc_data) {
-                kfree(rdata);
-        else
                rdata->header = NULL;
+                rdata = NULL;
+        }
        if (atomic_dec_and_test(&hdr->refcnt))
                hdr->completion_ops->completion(hdr);
+        /* Note: we only free the rpc_task after callbacks are done.
+         * See the comment in rpc_free_task() for why
+         */
+        kfree(rdata);
 }
 EXPORT_SYMBOL_GPL(nfs_readdata_release);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 3250b41eb562..17b32b722457 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -55,7 +55,6 @@
 #include <linux/parser.h>
 #include <linux/nsproxy.h>
 #include <linux/rcupdate.h>
-#include <linux/kthread.h>
 #include <asm/uaccess.h>
@@ -293,7 +292,7 @@ struct file_system_type nfs_fs_type = {
        .name           = "nfs",
        .mount          = nfs_fs_mount,
        .kill_sb        = nfs_kill_super,
-        .fs_flags       = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+        .fs_flags       = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
 };
 EXPORT_SYMBOL_GPL(nfs_fs_type);
@@ -302,7 +301,7 @@ struct file_system_type nfs_xdev_fs_type = {
        .name           = "nfs",
        .mount          = nfs_xdev_mount,
        .kill_sb        = nfs_kill_super,
-        .fs_flags       = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+        .fs_flags       = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
 };
 const struct super_operations nfs_sops = {
@@ -332,7 +331,7 @@ struct file_system_type nfs4_fs_type = {
        .name           = "nfs4",
        .mount          = nfs_fs_mount,
        .kill_sb        = nfs_kill_super,
-        .fs_flags       = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+        .fs_flags       = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
 };
 EXPORT_SYMBOL_GPL(nfs4_fs_type);
@@ -419,54 +418,6 @@ void nfs_sb_deactive(struct super_block *sb)
 }
 EXPORT_SYMBOL_GPL(nfs_sb_deactive);
-static int nfs_deactivate_super_async_work(void *ptr)
-{
-        struct super_block *sb = ptr;
-        deactivate_super(sb);
-        module_put_and_exit(0);
-        return 0;
-}
-/*
- * same effect as deactivate_super, but will do final unmount in kthread
- * context
- */
-static void nfs_deactivate_super_async(struct super_block *sb)
-{
-        struct task_struct *task;
-        char buf[INET6_ADDRSTRLEN + 1];
-        struct nfs_server *server = NFS_SB(sb);
-        struct nfs_client *clp = server->nfs_client;
-        if (!atomic_add_unless(&sb->s_active, -1, 1)) {
-                rcu_read_lock();
-                snprintf(buf, sizeof(buf),
-                        rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
-                rcu_read_unlock();
-                __module_get(THIS_MODULE);
-                task = kthread_run(nfs_deactivate_super_async_work, sb,
-                                "%s-deactivate-super", buf);
-                if (IS_ERR(task)) {
-                        pr_err("%s: kthread_run: %ld\n",
-                                __func__, PTR_ERR(task));
-                        /* make synchronous call and hope for the best */
-                        deactivate_super(sb);
-                        module_put(THIS_MODULE);
-                }
-        }
-}
-void nfs_sb_deactive_async(struct super_block *sb)
-{
-        struct nfs_server *server = NFS_SB(sb);
-        if (atomic_dec_and_test(&server->active))
-                nfs_deactivate_super_async(sb);
-}
-EXPORT_SYMBOL_GPL(nfs_sb_deactive_async);
 /*
 * Deliver file system statistics to userspace
 */
@@ -1153,7 +1104,7 @@ static int nfs_get_option_str(substring_t args[], char **option)
 {
        kfree(*option);
        *option = match_strdup(args);
-        return !option;
+        return !*option;
 }
 static int nfs_get_option_ul(substring_t args[], unsigned long *option)
@@ -2590,27 +2541,23 @@ nfs_xdev_mount(struct file_system_type *fs_type, int flags,
        struct nfs_server *server;
        struct dentry *mntroot = ERR_PTR(-ENOMEM);
        struct nfs_subversion *nfs_mod = NFS_SB(data->sb)->nfs_client->cl_nfs_mod;
-        int error;
-        dprintk("--> nfs_xdev_mount_common()\n");
+        dprintk("--> nfs_xdev_mount()\n");
        mount_info.mntfh = mount_info.cloned->fh;
        /* create a new volume representation */
        server = nfs_mod->rpc_ops->clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor);
-        if (IS_ERR(server)) {
-                error = PTR_ERR(server);
-                goto out_err;
-        }
-        mntroot = nfs_fs_mount_common(server, flags, dev_name, &mount_info, nfs_mod);
+        if (IS_ERR(server))
-        dprintk("<-- nfs_xdev_mount_common() = 0\n");
+                mntroot = ERR_CAST(server);
-out:
+        else
-        return mntroot;
+                mntroot = nfs_fs_mount_common(server, flags,
+                                dev_name, &mount_info, nfs_mod);
-out_err:
+        dprintk("<-- nfs_xdev_mount() = %ld\n",
-        dprintk("<-- nfs_xdev_mount_common() = %d [error]\n", error);
+                        IS_ERR(mntroot) ? PTR_ERR(mntroot) : 0L);
-        goto out;
+        return mntroot;
 }
 #if IS_ENABLED(CONFIG_NFS_V4)
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 3f79c77153b8..d26a32f5b53b 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -95,7 +95,7 @@ static void nfs_async_unlink_release(void *calldata)
        nfs_dec_sillycount(data->dir);
        nfs_free_unlinkdata(data);
-        nfs_sb_deactive_async(sb);
+        nfs_sb_deactive(sb);
 }
 static void nfs_unlink_prepare(struct rpc_task *task, void *calldata)
@@ -268,8 +268,7 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry)
         * point dentry is definitely not a root, so we won't need
         * that anymore.
         */
-        if (devname_garbage)
+        kfree(devname_garbage);
-                kfree(devname_garbage);
        return 0;
 out_unlock:
        spin_unlock(&dentry->d_lock);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index b673be31590e..c483cc50b82e 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -126,12 +126,16 @@ void nfs_writedata_release(struct nfs_write_data *wdata)
        put_nfs_open_context(wdata->args.context);
        if (wdata->pages.pagevec != wdata->pages.page_array)
                kfree(wdata->pages.pagevec);
-        if (wdata != &write_header->rpc_data)
+        if (wdata == &write_header->rpc_data) {
-                kfree(wdata);
-        else
                wdata->header = NULL;
+                wdata = NULL;
+        }
        if (atomic_dec_and_test(&hdr->refcnt))
                hdr->completion_ops->completion(hdr);
+        /* Note: we only free the rpc_task after callbacks are done.
+         * See the comment in rpc_free_task() for why
+         */
+        kfree(wdata);
 }
 EXPORT_SYMBOL_GPL(nfs_writedata_release);
diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c
index 6940439bd609..ed628f71274c 100644
--- a/fs/nfs_common/nfsacl.c
+++ b/fs/nfs_common/nfsacl.c
@@ -38,8 +38,8 @@ struct nfsacl_encode_desc {
        unsigned int count;
        struct posix_acl *acl;
        int typeflag;
-        uid_t uid;
+        kuid_t uid;
-        gid_t gid;
+        kgid_t gid;
 };
 struct nfsacl_simple_acl {
@@ -60,14 +60,16 @@ xdr_nfsace_encode(struct xdr_array2_desc *desc, void *elem)
        *p++ = htonl(entry->e_tag | nfsacl_desc->typeflag);
        switch(entry->e_tag) {
                case ACL_USER_OBJ:
-                        *p++ = htonl(nfsacl_desc->uid);
+                        *p++ = htonl(from_kuid(&init_user_ns, nfsacl_desc->uid));
                        break;
                case ACL_GROUP_OBJ:
-                        *p++ = htonl(nfsacl_desc->gid);
+                        *p++ = htonl(from_kgid(&init_user_ns, nfsacl_desc->gid));
                        break;
                case ACL_USER:
+                        *p++ = htonl(from_kuid(&init_user_ns, entry->e_uid));
+                        break;
                case ACL_GROUP:
-                        *p++ = htonl(entry->e_id);
+                        *p++ = htonl(from_kgid(&init_user_ns, entry->e_gid));
                        break;
                default:  /* Solaris depends on that! */
                        *p++ = 0;
@@ -148,6 +150,7 @@ xdr_nfsace_decode(struct xdr_array2_desc *desc, void *elem)
                (struct nfsacl_decode_desc *) desc;
        __be32 *p = elem;
        struct posix_acl_entry *entry;
+        unsigned int id;
        if (!nfsacl_desc->acl) {
                if (desc->array_len > NFS_ACL_MAX_ENTRIES)
@@ -160,14 +163,22 @@ xdr_nfsace_decode(struct xdr_array2_desc *desc, void *elem)
        entry = &nfsacl_desc->acl->a_entries[nfsacl_desc->count++];
        entry->e_tag = ntohl(*p++) & ~NFS_ACL_DEFAULT;
-        entry->e_id = ntohl(*p++);
+        id = ntohl(*p++);
        entry->e_perm = ntohl(*p++);
        switch(entry->e_tag) {
-                case ACL_USER_OBJ:
                case ACL_USER:
-                case ACL_GROUP_OBJ:
+                        entry->e_uid = make_kuid(&init_user_ns, id);
+                        if (!uid_valid(entry->e_uid))
+                                return -EINVAL;
+                        break;
                case ACL_GROUP:
+                        entry->e_gid = make_kgid(&init_user_ns, id);
+                        if (!gid_valid(entry->e_gid))
+                                return -EINVAL;
+                        break;
+                case ACL_USER_OBJ:
+                case ACL_GROUP_OBJ:
                case ACL_OTHER:
                        if (entry->e_perm & ~S_IRWXO)
                                return -EINVAL;
@@ -190,9 +201,13 @@ cmp_acl_entry(const void *x, const void *y)
        if (a->e_tag != b->e_tag)
                return a->e_tag - b->e_tag;
-        else if (a->e_id > b->e_id)
+        else if ((a->e_tag == ACL_USER) && uid_gt(a->e_uid, b->e_uid))
+                return 1;
+        else if ((a->e_tag == ACL_USER) && uid_lt(a->e_uid, b->e_uid))
+                return -1;
+        else if ((a->e_tag == ACL_GROUP) && gid_gt(a->e_gid, b->e_gid))
                return 1;
-        else if (a->e_id < b->e_id)
+        else if ((a->e_tag == ACL_GROUP) && gid_lt(a->e_gid, b->e_gid))
                return -1;
        else
                return 0;
@@ -213,22 +228,18 @@ posix_acl_from_nfsacl(struct posix_acl *acl)
        sort(acl->a_entries, acl->a_count, sizeof(struct posix_acl_entry),
             cmp_acl_entry, NULL);
-        /* Clear undefined identifier fields and find the ACL_GROUP_OBJ
+        /* Find the ACL_GROUP_OBJ and ACL_MASK entries. */
-           and ACL_MASK entries. */
        FOREACH_ACL_ENTRY(pa, acl, pe) {
                switch(pa->e_tag) {
                        case ACL_USER_OBJ:
-                                pa->e_id = ACL_UNDEFINED_ID;
                                break;
                        case ACL_GROUP_OBJ:
-                                pa->e_id = ACL_UNDEFINED_ID;
                                group_obj = pa;
                                break;
                        case ACL_MASK:
                                mask = pa;
                                /* fall through */
                        case ACL_OTHER:
-                                pa->e_id = ACL_UNDEFINED_ID;
                                break;
                }
        }
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 8df1ea4a6ff9..430b6872806f 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -65,8 +65,8 @@ config NFSD_V3_ACL
          If unsure, say N.
 config NFSD_V4
-        bool "NFS server support for NFS version 4 (EXPERIMENTAL)"
+        bool "NFS server support for NFS version 4"
-        depends on NFSD && PROC_FS && EXPERIMENTAL
+        depends on NFSD && PROC_FS
        select NFSD_V3
        select FS_POSIX_ACL
        select SUNRPC_GSS
diff --git a/fs/nfsd/acl.h b/fs/nfsd/acl.h
index 34e5c40af5ef..8b186a4955cc 100644
--- a/fs/nfsd/acl.h
+++ b/fs/nfsd/acl.h
@@ -44,8 +44,6 @@
 struct nfs4_acl *nfs4_acl_new(int);
 int nfs4_acl_get_whotype(char *, u32);
 int nfs4_acl_write_who(int who, char *p);
-int nfs4_acl_permission(struct nfs4_acl *acl, uid_t owner, gid_t group,
-                                        uid_t who, u32 mask);
 #define NFS4_ACL_TYPE_DEFAULT   0x01
 #define NFS4_ACL_DIR            0x02
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index 34a10d78b839..06cddd572264 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -47,9 +47,9 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
                if (!gi)
                        goto oom;
        } else if (flags & NFSEXP_ROOTSQUASH) {
-                if (!new->fsuid)
+                if (uid_eq(new->fsuid, GLOBAL_ROOT_UID))
                        new->fsuid = exp->ex_anon_uid;
-                if (!new->fsgid)
+                if (gid_eq(new->fsgid, GLOBAL_ROOT_GID))
                        new->fsgid = exp->ex_anon_gid;
                gi = groups_alloc(rqgi->ngroups);
@@ -58,7 +58,7 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
                for (i = 0; i < rqgi->ngroups; i++) {
                        if (gid_eq(GLOBAL_ROOT_GID, GROUP_AT(rqgi, i)))
-                                GROUP_AT(gi, i) = make_kgid(&init_user_ns, exp->ex_anon_gid);
+                                GROUP_AT(gi, i) = exp->ex_anon_gid;
                        else
                                GROUP_AT(gi, i) = GROUP_AT(rqgi, i);
                }
@@ -66,9 +66,9 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
                gi = get_group_info(rqgi);
        }
-        if (new->fsuid == (uid_t) -1)
+        if (uid_eq(new->fsuid, INVALID_UID))
                new->fsuid = exp->ex_anon_uid;
-        if (new->fsgid == (gid_t) -1)
+        if (gid_eq(new->fsgid, INVALID_GID))
                new->fsgid = exp->ex_anon_gid;
        ret = set_groups(new, gi);
@@ -76,7 +76,7 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
        if (ret < 0)
                goto error;
-        if (new->fsuid)
+        if (!uid_eq(new->fsuid, GLOBAL_ROOT_UID))
                new->cap_effective = cap_drop_nfsd_set(new->cap_effective);
        else
                new->cap_effective = cap_raise_nfsd_set(new->cap_effective,
diff --git a/fs/nfsd/auth.h b/fs/nfsd/auth.h
index 78b3c0e93822..53325a12ba62 100644
--- a/fs/nfsd/auth.h
+++ b/fs/nfsd/auth.h
@@ -1,6 +1,5 @@
 /*
 * nfsd-specific authentication stuff.
- * uid/gid mapping not yet implemented.
 *
 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
 */
@@ -8,11 +7,6 @@
 #ifndef LINUX_NFSD_AUTH_H
 #define LINUX_NFSD_AUTH_H
-#define nfsd_luid(rq, uid)      ((u32)(uid))
-#define nfsd_lgid(rq, gid)      ((u32)(gid))
-#define nfsd_ruid(rq, uid)      ((u32)(uid))
-#define nfsd_rgid(rq, gid)      ((u32)(gid))
 /*
 * Set the current process's fsuid/fsgid etc to those of the NFS
 * client user
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 15ebf91982b0..5f38ea36e266 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -535,13 +535,17 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
                err = get_int(&mesg, &an_int);
                if (err)
                        goto out3;
-                exp.ex_anon_uid= an_int;
+                exp.ex_anon_uid= make_kuid(&init_user_ns, an_int);
+                if (!uid_valid(exp.ex_anon_uid))
+                        goto out3;
                /* anon gid */
                err = get_int(&mesg, &an_int);
                if (err)
                        goto out3;
-                exp.ex_anon_gid= an_int;
+                exp.ex_anon_gid= make_kgid(&init_user_ns, an_int);
+                if (!gid_valid(exp.ex_anon_gid))
+                        goto out3;
                /* fsid */
                err = get_int(&mesg, &an_int);
@@ -604,7 +608,7 @@ out:
 }
 static void exp_flags(struct seq_file *m, int flag, int fsid,
-                uid_t anonu, uid_t anong, struct nfsd4_fs_locations *fslocs);
+                kuid_t anonu, kgid_t anong, struct nfsd4_fs_locations *fslocs);
 static void show_secinfo(struct seq_file *m, struct svc_export *exp);
 static int svc_export_show(struct seq_file *m,
@@ -1171,15 +1175,17 @@ static void show_secinfo(struct seq_file *m, struct svc_export *exp)
 }
 static void exp_flags(struct seq_file *m, int flag, int fsid,
-                uid_t anonu, uid_t anong, struct nfsd4_fs_locations *fsloc)
+                kuid_t anonu, kgid_t anong, struct nfsd4_fs_locations *fsloc)
 {
        show_expflags(m, flag, NFSEXP_ALLFLAGS);
        if (flag & NFSEXP_FSID)
                seq_printf(m, ",fsid=%d", fsid);
-        if (anonu != (uid_t)-2 && anonu != (0x10000-2))
+        if (!uid_eq(anonu, make_kuid(&init_user_ns, (uid_t)-2)) &&
-                seq_printf(m, ",anonuid=%u", anonu);
+            !uid_eq(anonu, make_kuid(&init_user_ns, 0x10000-2)))
-        if (anong != (gid_t)-2 && anong != (0x10000-2))
+                seq_printf(m, ",anonuid=%u", from_kuid(&init_user_ns, anonu));
-                seq_printf(m, ",anongid=%u", anong);
+        if (!gid_eq(anong, make_kgid(&init_user_ns, (gid_t)-2)) &&
+            !gid_eq(anong, make_kgid(&init_user_ns, 0x10000-2)))
+                seq_printf(m, ",anongid=%u", from_kgid(&init_user_ns, anong));
        if (fsloc && fsloc->locations_count > 0) {
                char *loctype = (fsloc->migrated) ? "refer" : "replicas";
                int i;
diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c
index 247c00ccdb0f..d620e7f81429 100644
--- a/fs/nfsd/fault_inject.c
+++ b/fs/nfsd/fault_inject.c
@@ -101,7 +101,7 @@ static ssize_t fault_inject_read(struct file *file, char __user *buf,
        loff_t pos = *ppos;
        if (!pos)
-                nfsd_inject_get(file->f_dentry->d_inode->i_private, &val);
+                nfsd_inject_get(file_inode(file)->i_private, &val);
        size = scnprintf(read_buf, sizeof(read_buf), "%llu\n", val);
        if (pos < 0)
@@ -133,10 +133,10 @@ static ssize_t fault_inject_write(struct file *file, const char __user *buf,
        size = rpc_pton(net, write_buf, size, (struct sockaddr *)&sa, sizeof(sa));
        if (size > 0)
-                nfsd_inject_set_client(file->f_dentry->d_inode->i_private, &sa, size);
+                nfsd_inject_set_client(file_inode(file)->i_private, &sa, size);
        else {
                val = simple_strtoll(write_buf, NULL, 0);
-                nfsd_inject_set(file->f_dentry->d_inode->i_private, val);
+                nfsd_inject_set(file_inode(file)->i_private, val);
        }
        return len; /* on success, claim we got the whole input */
 }
diff --git a/fs/nfsd/idmap.h b/fs/nfsd/idmap.h
index 9d513efc01ba..bf95f6b817a4 100644
--- a/fs/nfsd/idmap.h
+++ b/fs/nfsd/idmap.h
@@ -54,9 +54,9 @@ static inline void nfsd_idmap_shutdown(struct net *net)
 }
 #endif
-__be32 nfsd_map_name_to_uid(struct svc_rqst *, const char *, size_t, __u32 *);
+__be32 nfsd_map_name_to_uid(struct svc_rqst *, const char *, size_t, kuid_t *);
-__be32 nfsd_map_name_to_gid(struct svc_rqst *, const char *, size_t, __u32 *);
+__be32 nfsd_map_name_to_gid(struct svc_rqst *, const char *, size_t, kgid_t *);
-int nfsd_map_uid_to_name(struct svc_rqst *, __u32, char *);
+int nfsd_map_uid_to_name(struct svc_rqst *, kuid_t, char *);
-int nfsd_map_gid_to_name(struct svc_rqst *, __u32, char *);
+int nfsd_map_gid_to_name(struct svc_rqst *, kgid_t, char *);
 #endif /* LINUX_NFSD_IDMAP_H */
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 9170861c804a..95d76dc6c5da 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -45,6 +45,10 @@ static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp,
                RETURN_STATUS(nfserr_inval);
        resp->mask = argp->mask;
+        nfserr = fh_getattr(fh, &resp->stat);
+        if (nfserr)
+                goto fail;
        if (resp->mask & (NFS_ACL|NFS_ACLCNT)) {
                acl = nfsd_get_posix_acl(fh, ACL_TYPE_ACCESS);
                if (IS_ERR(acl)) {
@@ -115,6 +119,9 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst * rqstp,
                nfserr = nfserrno( nfsd_set_posix_acl(
                        fh, ACL_TYPE_DEFAULT, argp->acl_default) );
        }
+        if (!nfserr) {
+                nfserr = fh_getattr(fh, &resp->stat);
+        }
        /* argp->acl_{access,default} may have been allocated in
           nfssvc_decode_setaclargs. */
@@ -129,10 +136,15 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst * rqstp,
 static __be32 nfsacld_proc_getattr(struct svc_rqst * rqstp,
                struct nfsd_fhandle *argp, struct nfsd_attrstat *resp)
 {
+        __be32 nfserr;
        dprintk("nfsd: GETATTR  %s\n", SVCFH_fmt(&argp->fh));
        fh_copy(&resp->fh, &argp->fh);
-        return fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP);
+        nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP);
+        if (nfserr)
+                return nfserr;
+        nfserr = fh_getattr(&resp->fh, &resp->stat);
+        return nfserr;
 }
 /*
@@ -150,6 +162,9 @@ static __be32 nfsacld_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessarg
        fh_copy(&resp->fh, &argp->fh);
        resp->access = argp->access;
        nfserr = nfsd_access(rqstp, &resp->fh, &resp->access, NULL);
+        if (nfserr)
+                return nfserr;
+        nfserr = fh_getattr(&resp->fh, &resp->stat);
        return nfserr;
 }
@@ -243,7 +258,7 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
                return 0;
        inode = dentry->d_inode;
-        p = nfs2svc_encode_fattr(rqstp, p, &resp->fh);
+        p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat);
        *p++ = htonl(resp->mask);
        if (!xdr_ressize_check(rqstp, p))
                return 0;
@@ -274,7 +289,7 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
 static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p,
                struct nfsd_attrstat *resp)
 {
-        p = nfs2svc_encode_fattr(rqstp, p, &resp->fh);
+        p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat);
        return xdr_ressize_check(rqstp, p);
 }
@@ -282,7 +297,7 @@ static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p,
 static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, __be32 *p,
                struct nfsd3_accessres *resp)
 {
-        p = nfs2svc_encode_fattr(rqstp, p, &resp->fh);
+        p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat);
        *p++ = htonl(resp->access);
        return xdr_ressize_check(rqstp, p);
 }
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 1fc02dfdc5c4..401289913130 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -43,7 +43,6 @@ static __be32
 nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle  *argp,
                                           struct nfsd3_attrstat *resp)
 {
-        int     err;
        __be32  nfserr;
        dprintk("nfsd: GETATTR(3)  %s\n",
@@ -55,9 +54,7 @@ nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle  *argp,
        if (nfserr)
                RETURN_STATUS(nfserr);
-        err = vfs_getattr(resp->fh.fh_export->ex_path.mnt,
+        nfserr = fh_getattr(&resp->fh, &resp->stat);
-                          resp->fh.fh_dentry, &resp->stat);
-        nfserr = nfserrno(err);
        RETURN_STATUS(nfserr);
 }
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 324c0baf7cda..14d9ecb96cff 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -11,6 +11,7 @@
 #include "xdr3.h"
 #include "auth.h"
 #include "netns.h"
+#include "vfs.h"
 #define NFSDDBG_FACILITY                NFSDDBG_XDR
@@ -105,12 +106,14 @@ decode_sattr3(__be32 *p, struct iattr *iap)
                iap->ia_mode = ntohl(*p++);
        }
        if (*p++) {
-                iap->ia_valid |= ATTR_UID;
+                iap->ia_uid = make_kuid(&init_user_ns, ntohl(*p++));
-                iap->ia_uid = ntohl(*p++);
+                if (uid_valid(iap->ia_uid))
+                        iap->ia_valid |= ATTR_UID;
        }
        if (*p++) {
-                iap->ia_valid |= ATTR_GID;
+                iap->ia_gid = make_kgid(&init_user_ns, ntohl(*p++));
-                iap->ia_gid = ntohl(*p++);
+                if (gid_valid(iap->ia_gid))
+                        iap->ia_valid |= ATTR_GID;
        }
        if (*p++) {
                u64     newsize;
@@ -167,8 +170,8 @@ encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
        *p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]);
        *p++ = htonl((u32) stat->mode);
        *p++ = htonl((u32) stat->nlink);
-        *p++ = htonl((u32) nfsd_ruid(rqstp, stat->uid));
+        *p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid));
-        *p++ = htonl((u32) nfsd_rgid(rqstp, stat->gid));
+        *p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid));
        if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN) {
                p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN);
        } else {
@@ -204,10 +207,10 @@ encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
 {
        struct dentry *dentry = fhp->fh_dentry;
        if (dentry && dentry->d_inode) {
-                int err;
+                __be32 err;
                struct kstat stat;
-                err = vfs_getattr(fhp->fh_export->ex_path.mnt, dentry, &stat);
+                err = fh_getattr(fhp, &stat);
                if (!err) {
                        *p++ = xdr_one;         /* attributes follow */
                        lease_get_mtime(dentry->d_inode, &stat.mtime);
@@ -254,13 +257,12 @@ encode_wcc_data(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
 */
 void fill_post_wcc(struct svc_fh *fhp)
 {
-        int err;
+        __be32 err;
        if (fhp->fh_post_saved)
                printk("nfsd: inode locked twice during operation.\n");
-        err = vfs_getattr(fhp->fh_export->ex_path.mnt, fhp->fh_dentry,
+        err = fh_getattr(fhp, &fhp->fh_post_attr);
-                        &fhp->fh_post_attr);
        fhp->fh_post_change = fhp->fh_dentry->d_inode->i_version;
        if (err) {
                fhp->fh_post_saved = 0;
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 9c51aff02ae2..8a50b3c18093 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -264,7 +264,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl,
                        ace->flag = eflag;
                        ace->access_mask = deny_mask_from_posix(deny, flags);
                        ace->whotype = NFS4_ACL_WHO_NAMED;
-                        ace->who = pa->e_id;
+                        ace->who_uid = pa->e_uid;
                        ace++;
                        acl->naces++;
                }
@@ -273,7 +273,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl,
                ace->access_mask = mask_from_posix(pa->e_perm & pas.mask,
                                                   flags);
                ace->whotype = NFS4_ACL_WHO_NAMED;
-                ace->who = pa->e_id;
+                ace->who_uid = pa->e_uid;
                ace++;
                acl->naces++;
                pa++;
@@ -300,7 +300,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl,
                ace->access_mask = mask_from_posix(pa->e_perm & pas.mask,
                                                   flags);
                ace->whotype = NFS4_ACL_WHO_NAMED;
-                ace->who = pa->e_id;
+                ace->who_gid = pa->e_gid;
                ace++;
                acl->naces++;
                pa++;
@@ -329,7 +329,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl,
                        ace->flag = eflag | NFS4_ACE_IDENTIFIER_GROUP;
                        ace->access_mask = deny_mask_from_posix(deny, flags);
                        ace->whotype = NFS4_ACL_WHO_NAMED;
-                        ace->who = pa->e_id;
+                        ace->who_gid = pa->e_gid;
                        ace++;
                        acl->naces++;
                }
@@ -345,6 +345,18 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl,
        acl->naces++;
 }
+static bool
+pace_gt(struct posix_acl_entry *pace1, struct posix_acl_entry *pace2)
+{
+        if (pace1->e_tag != pace2->e_tag)
+                return pace1->e_tag > pace2->e_tag;
+        if (pace1->e_tag == ACL_USER)
+                return uid_gt(pace1->e_uid, pace2->e_uid);
+        if (pace1->e_tag == ACL_GROUP)
+                return gid_gt(pace1->e_gid, pace2->e_gid);
+        return false;
+}
 static void
 sort_pacl_range(struct posix_acl *pacl, int start, int end) {
        int sorted = 0, i;
@@ -355,8 +367,8 @@ sort_pacl_range(struct posix_acl *pacl, int start, int end) {
        while (!sorted) {
                sorted = 1;
                for (i = start; i < end; i++) {
-                        if (pacl->a_entries[i].e_id
+                        if (pace_gt(&pacl->a_entries[i],
-                                        > pacl->a_entries[i+1].e_id) {
+                                    &pacl->a_entries[i+1])) {
                                sorted = 0;
                                tmp = pacl->a_entries[i];
                                pacl->a_entries[i] = pacl->a_entries[i+1];
@@ -398,7 +410,10 @@ struct posix_ace_state {
 };
 struct posix_user_ace_state {
-        uid_t uid;
+        union {
+                kuid_t uid;
+                kgid_t gid;
+        };
        struct posix_ace_state perms;
 };
@@ -521,7 +536,6 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
        if (error)
                goto out_err;
        low_mode_from_nfs4(state->owner.allow, &pace->e_perm, flags);
-        pace->e_id = ACL_UNDEFINED_ID;
        for (i=0; i < state->users->n; i++) {
                pace++;
@@ -531,7 +545,7 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
                        goto out_err;
                low_mode_from_nfs4(state->users->aces[i].perms.allow,
                                        &pace->e_perm, flags);
-                pace->e_id = state->users->aces[i].uid;
+                pace->e_uid = state->users->aces[i].uid;
                add_to_mask(state, &state->users->aces[i].perms);
        }
@@ -541,7 +555,6 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
        if (error)
                goto out_err;
        low_mode_from_nfs4(state->group.allow, &pace->e_perm, flags);
-        pace->e_id = ACL_UNDEFINED_ID;
        add_to_mask(state, &state->group);
        for (i=0; i < state->groups->n; i++) {
@@ -552,14 +565,13 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
                        goto out_err;
                low_mode_from_nfs4(state->groups->aces[i].perms.allow,
                                        &pace->e_perm, flags);
-                pace->e_id = state->groups->aces[i].uid;
+                pace->e_gid = state->groups->aces[i].gid;
                add_to_mask(state, &state->groups->aces[i].perms);
        }
        pace++;
        pace->e_tag = ACL_MASK;
        low_mode_from_nfs4(state->mask.allow, &pace->e_perm, flags);
-        pace->e_id = ACL_UNDEFINED_ID;
        pace++;
        pace->e_tag = ACL_OTHER;
@@ -567,7 +579,6 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
        if (error)
                goto out_err;
        low_mode_from_nfs4(state->other.allow, &pace->e_perm, flags);
-        pace->e_id = ACL_UNDEFINED_ID;
        return pacl;
 out_err:
@@ -587,12 +598,13 @@ static inline void deny_bits(struct posix_ace_state *astate, u32 mask)
        astate->deny |= mask & ~astate->allow;
 }
-static int find_uid(struct posix_acl_state *state, struct posix_ace_state_array *a, uid_t uid)
+static int find_uid(struct posix_acl_state *state, kuid_t uid)
 {
+        struct posix_ace_state_array *a = state->users;
        int i;
        for (i = 0; i < a->n; i++)
-                if (a->aces[i].uid == uid)
+                if (uid_eq(a->aces[i].uid, uid))
                        return i;
        /* Not found: */
        a->n++;
@@ -603,6 +615,23 @@ static int find_uid(struct posix_acl_state *state, struct posix_ace_state_array
        return i;
 }
+static int find_gid(struct posix_acl_state *state, kgid_t gid)
+{
+        struct posix_ace_state_array *a = state->groups;
+        int i;
+        for (i = 0; i < a->n; i++)
+                if (gid_eq(a->aces[i].gid, gid))
+                        return i;
+        /* Not found: */
+        a->n++;
+        a->aces[i].gid = gid;
+        a->aces[i].perms.allow = state->everyone.allow;
+        a->aces[i].perms.deny  = state->everyone.deny;
+        return i;
+}
 static void deny_bits_array(struct posix_ace_state_array *a, u32 mask)
 {
        int i;
@@ -636,7 +665,7 @@ static void process_one_v4_ace(struct posix_acl_state *state,
                }
                break;
        case ACL_USER:
-                i = find_uid(state, state->users, ace->who);
+                i = find_uid(state, ace->who_uid);
                if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) {
                        allow_bits(&state->users->aces[i].perms, mask);
                } else {
@@ -658,7 +687,7 @@ static void process_one_v4_ace(struct posix_acl_state *state,
                }
                break;
        case ACL_GROUP:
-                i = find_uid(state, state->groups, ace->who);
+                i = find_gid(state, ace->who_gid);
                if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) {
                        allow_bits(&state->groups->aces[i].perms, mask);
                } else {
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index d9402ea9d751..4832fd819f88 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -65,7 +65,7 @@ MODULE_PARM_DESC(nfs4_disable_idmapping,
 struct ent {
        struct cache_head h;
        int               type;                /* User / Group */
-        uid_t             id;
+        u32               id;
        char              name[IDMAP_NAMESZ];
        char              authname[IDMAP_NAMESZ];
 };
@@ -528,7 +528,7 @@ rqst_authname(struct svc_rqst *rqstp)
 static __be32
 idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen,
-                uid_t *id)
+                u32 *id)
 {
        struct ent *item, key = {
                .type = type,
@@ -552,7 +552,7 @@ idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen
 }
 static int
-idmap_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name)
+idmap_id_to_name(struct svc_rqst *rqstp, int type, u32 id, char *name)
 {
        struct ent *item, key = {
                .id = id,
@@ -575,7 +575,7 @@ idmap_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name)
 }
 static bool
-numeric_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, uid_t *id)
+numeric_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, u32 *id)
 {
        int ret;
        char buf[11];
@@ -591,7 +591,7 @@ numeric_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namel
 }
 static __be32
-do_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, uid_t *id)
+do_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, u32 *id)
 {
        if (nfs4_disable_idmapping && rqstp->rq_cred.cr_flavor < RPC_AUTH_GSS)
                if (numeric_name_to_id(rqstp, type, name, namelen, id))
@@ -604,7 +604,7 @@ do_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, u
 }
 static int
-do_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name)
+do_id_to_name(struct svc_rqst *rqstp, int type, u32 id, char *name)
 {
        if (nfs4_disable_idmapping && rqstp->rq_cred.cr_flavor < RPC_AUTH_GSS)
                return sprintf(name, "%u", id);
@@ -613,26 +613,40 @@ do_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name)
 __be32
 nfsd_map_name_to_uid(struct svc_rqst *rqstp, const char *name, size_t namelen,
-                __u32 *id)
+                kuid_t *uid)
 {
-        return do_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, id);
+        __be32 status;
+        u32 id = -1;
+        status = do_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, &id);
+        *uid = make_kuid(&init_user_ns, id);
+        if (!uid_valid(*uid))
+                status = nfserr_badowner;
+        return status;
 }
 __be32
 nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen,
-                __u32 *id)
+                kgid_t *gid)
 {
-        return do_name_to_id(rqstp, IDMAP_TYPE_GROUP, name, namelen, id);
+        __be32 status;
+        u32 id = -1;
+        status = do_name_to_id(rqstp, IDMAP_TYPE_GROUP, name, namelen, &id);
+        *gid = make_kgid(&init_user_ns, id);
+        if (!gid_valid(*gid))
+                status = nfserr_badowner;
+        return status;
 }
 int
-nfsd_map_uid_to_name(struct svc_rqst *rqstp, __u32 id, char *name)
+nfsd_map_uid_to_name(struct svc_rqst *rqstp, kuid_t uid, char *name)
 {
+        u32 id = from_kuid(&init_user_ns, uid);
        return do_id_to_name(rqstp, IDMAP_TYPE_USER, id, name);
 }
 int
-nfsd_map_gid_to_name(struct svc_rqst *rqstp, __u32 id, char *name)
+nfsd_map_gid_to_name(struct svc_rqst *rqstp, kgid_t gid, char *name)
 {
+        u32 id = from_kgid(&init_user_ns, gid);
        return do_id_to_name(rqstp, IDMAP_TYPE_GROUP, id, name);
 }
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index e0ae1cf18a82..899ca26dd194 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -73,8 +73,8 @@ nfs4_save_creds(const struct cred **original_creds)
        if (!new)
                return -ENOMEM;
-        new->fsuid = 0;
+        new->fsuid = GLOBAL_ROOT_UID;
-        new->fsgid = 0;
+        new->fsgid = GLOBAL_ROOT_GID;
        *original_creds = override_creds(new);
        put_cred(new);
        return 0;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index f194f869be4c..16d39c6c4fbb 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -151,7 +151,7 @@ get_nfs4_file(struct nfs4_file *fi)
 }
 static int num_delegations;
-unsigned int max_delegations;
+unsigned long max_delegations;
 /*
 * Open owner state (share locks)
@@ -719,8 +719,8 @@ static int nfsd4_get_drc_mem(int slotsize, u32 num)
        num = min_t(u32, num, NFSD_MAX_SLOTS_PER_SESSION);
        spin_lock(&nfsd_drc_lock);
-        avail = min_t(int, NFSD_MAX_MEM_PER_SESSION,
+        avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION,
-                        nfsd_drc_max_mem - nfsd_drc_mem_used);
+                    nfsd_drc_max_mem - nfsd_drc_mem_used);
        num = min_t(int, num, avail / slotsize);
        nfsd_drc_mem_used += num * slotsize;
        spin_unlock(&nfsd_drc_lock);
@@ -1079,7 +1079,6 @@ free_client(struct nfs4_client *clp)
        }
        free_svc_cred(&clp->cl_cred);
        kfree(clp->cl_name.data);
-        idr_remove_all(&clp->cl_stateids);
        idr_destroy(&clp->cl_stateids);
        kfree(clp);
 }
@@ -1223,7 +1222,7 @@ static bool groups_equal(struct group_info *g1, struct group_info *g2)
        if (g1->ngroups != g2->ngroups)
                return false;
        for (i=0; i<g1->ngroups; i++)
-                if (GROUP_AT(g1, i) != GROUP_AT(g2, i))
+                if (!gid_eq(GROUP_AT(g1, i), GROUP_AT(g2, i)))
                        return false;
        return true;
 }
@@ -1248,8 +1247,8 @@ static bool
 same_creds(struct svc_cred *cr1, struct svc_cred *cr2)
 {
        if ((is_gss_cred(cr1) != is_gss_cred(cr2))
-                || (cr1->cr_uid != cr2->cr_uid)
+                || (!uid_eq(cr1->cr_uid, cr2->cr_uid))
-                || (cr1->cr_gid != cr2->cr_gid)
+                || (!gid_eq(cr1->cr_gid, cr2->cr_gid))
                || !groups_equal(cr1->cr_group_info, cr2->cr_group_info))
                return false;
        if (cr1->cr_principal == cr2->cr_principal)
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index fcb5bed99c33..01168865dd37 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -293,13 +293,13 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
                        ace->whotype = nfs4_acl_get_whotype(buf, dummy32);
                        status = nfs_ok;
                        if (ace->whotype != NFS4_ACL_WHO_NAMED)
-                                ace->who = 0;
+                                ;
                        else if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP)
                                status = nfsd_map_name_to_gid(argp->rqstp,
-                                                buf, dummy32, &ace->who);
+                                                buf, dummy32, &ace->who_gid);
                        else
                                status = nfsd_map_name_to_uid(argp->rqstp,
-                                                buf, dummy32, &ace->who);
+                                                buf, dummy32, &ace->who_uid);
                        if (status)
                                return status;
                }
@@ -464,9 +464,16 @@ static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_
                        READ32(dummy);
                        READ_BUF(dummy * 4);
                        if (cbs->flavor == (u32)(-1)) {
-                                cbs->uid = uid;
+                                kuid_t kuid = make_kuid(&init_user_ns, uid);
-                                cbs->gid = gid;
+                                kgid_t kgid = make_kgid(&init_user_ns, gid);
-                                cbs->flavor = RPC_AUTH_UNIX;
+                                if (uid_valid(kuid) && gid_valid(kgid)) {
+                                        cbs->uid = kuid;
+                                        cbs->gid = kgid;
+                                        cbs->flavor = RPC_AUTH_UNIX;
+                                } else {
+                                        dprintk("RPC_AUTH_UNIX with invalid"
+                                                "uid or gid ignoring!\n");
+                                }
                        }
                        break;
                case RPC_AUTH_GSS:
@@ -1926,7 +1933,7 @@ static u32 nfs4_file_type(umode_t mode)
 }
 static __be32
-nfsd4_encode_name(struct svc_rqst *rqstp, int whotype, uid_t id, int group,
+nfsd4_encode_name(struct svc_rqst *rqstp, int whotype, kuid_t uid, kgid_t gid,
                        __be32 **p, int *buflen)
 {
        int status;
@@ -1935,10 +1942,10 @@ nfsd4_encode_name(struct svc_rqst *rqstp, int whotype, uid_t id, int group,
                return nfserr_resource;
        if (whotype != NFS4_ACL_WHO_NAMED)
                status = nfs4_acl_write_who(whotype, (u8 *)(*p + 1));
-        else if (group)
+        else if (gid_valid(gid))
-                status = nfsd_map_gid_to_name(rqstp, id, (u8 *)(*p + 1));
+                status = nfsd_map_gid_to_name(rqstp, gid, (u8 *)(*p + 1));
        else
-                status = nfsd_map_uid_to_name(rqstp, id, (u8 *)(*p + 1));
+                status = nfsd_map_uid_to_name(rqstp, uid, (u8 *)(*p + 1));
        if (status < 0)
                return nfserrno(status);
        *p = xdr_encode_opaque(*p, NULL, status);
@@ -1948,22 +1955,33 @@ nfsd4_encode_name(struct svc_rqst *rqstp, int whotype, uid_t id, int group,
 }
 static inline __be32
-nfsd4_encode_user(struct svc_rqst *rqstp, uid_t uid, __be32 **p, int *buflen)
+nfsd4_encode_user(struct svc_rqst *rqstp, kuid_t user, __be32 **p, int *buflen)
 {
-        return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, uid, 0, p, buflen);
+        return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, user, INVALID_GID,
+                                 p, buflen);
 }
 static inline __be32
-nfsd4_encode_group(struct svc_rqst *rqstp, uid_t gid, __be32 **p, int *buflen)
+nfsd4_encode_group(struct svc_rqst *rqstp, kgid_t group, __be32 **p, int *buflen)
 {
-        return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, gid, 1, p, buflen);
+        return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, INVALID_UID, group,
+                                 p, buflen);
 }
 static inline __be32
-nfsd4_encode_aclname(struct svc_rqst *rqstp, int whotype, uid_t id, int group,
+nfsd4_encode_aclname(struct svc_rqst *rqstp, struct nfs4_ace *ace,
                __be32 **p, int *buflen)
 {
-        return nfsd4_encode_name(rqstp, whotype, id, group, p, buflen);
+        kuid_t uid = INVALID_UID;
+        kgid_t gid = INVALID_GID;
+        if (ace->whotype == NFS4_ACL_WHO_NAMED) {
+                if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP)
+                        gid = ace->who_gid;
+                else
+                        uid = ace->who_uid;
+        }
+        return nfsd4_encode_name(rqstp, ace->whotype, uid, gid, p, buflen);
 }
 #define WORD0_ABSENT_FS_ATTRS (FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_FSID | \
@@ -1997,7 +2015,7 @@ static int get_parent_attributes(struct svc_export *exp, struct kstat *stat)
                if (path.dentry != path.mnt->mnt_root)
                        break;
        }
-        err = vfs_getattr(path.mnt, path.dentry, stat);
+        err = vfs_getattr(&path, stat);
        path_put(&path);
        return err;
 }
@@ -2049,7 +2067,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
                        goto out;
        }
-        err = vfs_getattr(exp->ex_path.mnt, dentry, &stat);
+        err = vfs_getattr(&path, &stat);
        if (err)
                goto out_nfserr;
        if ((bmval0 & (FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL |
@@ -2223,9 +2241,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
                        WRITE32(ace->type);
                        WRITE32(ace->flag);
                        WRITE32(ace->access_mask & NFS4_ACE_MASK_ALL);
-                        status = nfsd4_encode_aclname(rqstp, ace->whotype,
+                        status = nfsd4_encode_aclname(rqstp, ace, &p, &buflen);
-                                ace->who, ace->flag & NFS4_ACE_IDENTIFIER_GROUP,
-                                &p, &buflen);
                        if (status == nfserr_resource)
                                goto out_resource;
                        if (status)
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index ca43664422f6..62c1ee128aeb 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -281,7 +281,6 @@ static struct svc_cacherep *
 nfsd_cache_search(struct svc_rqst *rqstp, __wsum csum)
 {
        struct svc_cacherep     *rp;
-        struct hlist_node       *hn;
        struct hlist_head       *rh;
        __be32                  xid = rqstp->rq_xid;
        u32                     proto =  rqstp->rq_prot,
@@ -289,7 +288,7 @@ nfsd_cache_search(struct svc_rqst *rqstp, __wsum csum)
                                proc = rqstp->rq_proc;
        rh = &cache_hash[request_hash(xid)];
-        hlist_for_each_entry(rp, hn, rh, c_hash) {
+        hlist_for_each_entry(rp, rh, c_hash) {
                if (xid == rp->c_xid && proc == rp->c_proc &&
                    proto == rp->c_prot && vers == rp->c_vers &&
                    rqstp->rq_arg.len == rp->c_len && csum == rp->c_csum &&
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 8ead2c25ce65..13a21c8fca49 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -85,7 +85,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
 static ssize_t nfsctl_transaction_write(struct file *file, const char __user *buf, size_t size, loff_t *pos)
 {
-        ino_t ino =  file->f_path.dentry->d_inode->i_ino;
+        ino_t ino =  file_inode(file)->i_ino;
        char *data;
        ssize_t rv;
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index de23db255c69..07a473fd49bc 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -56,8 +56,8 @@ extern struct svc_version	nfsd_version2, nfsd_version3,
 extern u32                      nfsd_supported_minorversion;
 extern struct mutex             nfsd_mutex;
 extern spinlock_t               nfsd_drc_lock;
-extern unsigned int             nfsd_drc_max_mem;
+extern unsigned long            nfsd_drc_max_mem;
-extern unsigned int             nfsd_drc_mem_used;
+extern unsigned long            nfsd_drc_mem_used;
 extern const struct seq_operations nfs_exports_op;
@@ -106,7 +106,7 @@ static inline int nfsd_v4client(struct svc_rqst *rq)
 * NFSv4 State
 */
 #ifdef CONFIG_NFSD_V4
-extern unsigned int max_delegations;
+extern unsigned long max_delegations;
 void nfs4_state_init(void);
 int nfsd4_init_slabs(void);
 void nfsd4_free_slabs(void);
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index aad6d457b9e8..54c6b3d3cc79 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -26,17 +26,13 @@ static __be32
 nfsd_return_attrs(__be32 err, struct nfsd_attrstat *resp)
 {
        if (err) return err;
-        return nfserrno(vfs_getattr(resp->fh.fh_export->ex_path.mnt,
+        return fh_getattr(&resp->fh, &resp->stat);
-                                    resp->fh.fh_dentry,
-                                    &resp->stat));
 }
 static __be32
 nfsd_return_dirop(__be32 err, struct nfsd_diropres *resp)
 {
        if (err) return err;
-        return nfserrno(vfs_getattr(resp->fh.fh_export->ex_path.mnt,
+        return fh_getattr(&resp->fh, &resp->stat);
-                                    resp->fh.fh_dentry,
-                                    &resp->stat));
 }
 /*
 * Get a file's attributes
@@ -150,9 +146,7 @@ nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp,
                                  &resp->count);
        if (nfserr) return nfserr;
-        return nfserrno(vfs_getattr(resp->fh.fh_export->ex_path.mnt,
+        return fh_getattr(&resp->fh, &resp->stat);
-                                    resp->fh.fh_dentry,
-                                    &resp->stat));
 }
 /*
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 6cee5db72047..262df5ccbf59 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -59,8 +59,8 @@ DEFINE_MUTEX(nfsd_mutex);
 * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage.
 */
 spinlock_t      nfsd_drc_lock;
-unsigned int    nfsd_drc_max_mem;
+unsigned long   nfsd_drc_max_mem;
-unsigned int    nfsd_drc_mem_used;
+unsigned long   nfsd_drc_mem_used;
 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
 static struct svc_stat  nfsd_acl_svcstats;
@@ -342,7 +342,7 @@ static void set_max_drc(void)
                                        >> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE;
        nfsd_drc_mem_used = 0;
        spin_lock_init(&nfsd_drc_lock);
-        dprintk("%s nfsd_drc_max_mem %u \n", __func__, nfsd_drc_max_mem);
+        dprintk("%s nfsd_drc_max_mem %lu \n", __func__, nfsd_drc_max_mem);
 }
 static int nfsd_get_default_max_blksize(void)
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 979b42106979..9c769a47ac5a 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -4,6 +4,7 @@
 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
 */
+#include "vfs.h"
 #include "xdr.h"
 #include "auth.h"
@@ -100,12 +101,14 @@ decode_sattr(__be32 *p, struct iattr *iap)
                iap->ia_mode = tmp;
        }
        if ((tmp = ntohl(*p++)) != (u32)-1) {
-                iap->ia_valid |= ATTR_UID;
+                iap->ia_uid = make_kuid(&init_user_ns, tmp);
-                iap->ia_uid = tmp;
+                if (uid_valid(iap->ia_uid))
+                        iap->ia_valid |= ATTR_UID;
        }
        if ((tmp = ntohl(*p++)) != (u32)-1) {
-                iap->ia_valid |= ATTR_GID;
+                iap->ia_gid = make_kgid(&init_user_ns, tmp);
-                iap->ia_gid = tmp;
+                if (gid_valid(iap->ia_gid))
+                        iap->ia_valid |= ATTR_GID;
        }
        if ((tmp = ntohl(*p++)) != (u32)-1) {
                iap->ia_valid |= ATTR_SIZE;
@@ -151,8 +154,8 @@ encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
        *p++ = htonl(nfs_ftypes[type >> 12]);
        *p++ = htonl((u32) stat->mode);
        *p++ = htonl((u32) stat->nlink);
-        *p++ = htonl((u32) nfsd_ruid(rqstp, stat->uid));
+        *p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid));
-        *p++ = htonl((u32) nfsd_rgid(rqstp, stat->gid));
+        *p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid));
        if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN) {
                *p++ = htonl(NFS_MAXPATHLEN);
@@ -194,11 +197,9 @@ encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
 }
 /* Helper function for NFSv2 ACL code */
-__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
+__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct kstat *stat)
 {
-        struct kstat stat;
+        return encode_fattr(rqstp, p, fhp, stat);
-        vfs_getattr(fhp->fh_export->ex_path.mnt, fhp->fh_dentry, &stat);
-        return encode_fattr(rqstp, p, fhp, &stat);
 }
 /*
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index d1c229feed52..1a8c7391f7ae 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -152,8 +152,8 @@ struct nfsd4_channel_attrs {
 struct nfsd4_cb_sec {
        u32     flavor; /* (u32)(-1) used to mean "no valid flavor" */
-        u32     uid;
+        kuid_t  uid;
-        u32     gid;
+        kgid_t  gid;
 };
 struct nfsd4_create_session {
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index d586117fa94a..2a7eb536de0b 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -401,8 +401,8 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
        /* Revoke setuid/setgid on chown */
        if (!S_ISDIR(inode->i_mode) &&
-            (((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) ||
+            (((iap->ia_valid & ATTR_UID) && !uid_eq(iap->ia_uid, inode->i_uid)) ||
-             ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid))) {
+             ((iap->ia_valid & ATTR_GID) && !gid_eq(iap->ia_gid, inode->i_gid)))) {
                iap->ia_valid |= ATTR_KILL_PRIV;
                if (iap->ia_valid & ATTR_MODE) {
                        /* we're setting mode too, just clear the s*id bits */
@@ -979,7 +979,7 @@ static void kill_suid(struct dentry *dentry)
 */
 static int wait_for_concurrent_writes(struct file *file)
 {
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(file);
        static ino_t last_ino;
        static dev_t last_dev;
        int err = 0;
@@ -1070,7 +1070,7 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
        if (err)
                return err;
-        inode = file->f_path.dentry->d_inode;
+        inode = file_inode(file);
        /* Get readahead parameters */
        ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino);
@@ -1205,7 +1205,7 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp,
         * send along the gid on create when it tries to implement
         * setgid directories via NFS:
         */
-        if (current_fsuid() != 0)
+        if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID))
                iap->ia_valid &= ~(ATTR_UID|ATTR_GID);
        if (iap->ia_valid)
                return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
@@ -1957,7 +1957,7 @@ static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func,
        offset = *offsetp;
        while (1) {
-                struct inode *dir_inode = file->f_path.dentry->d_inode;
+                struct inode *dir_inode = file_inode(file);
                unsigned int reclen;
                cdp->err = nfserr_eof; /* will be cleared on successful read */
@@ -2150,7 +2150,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
         * with NFSv3.
         */
        if ((acc & NFSD_MAY_OWNER_OVERRIDE) &&
-            inode->i_uid == current_fsuid())
+            uid_eq(inode->i_uid, current_fsuid()))
                return 0;
        /* This assumes  NFSD_MAY_{READ,WRITE,EXEC} == MAY_{READ,WRITE,EXEC} */
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 359594c393d2..5b5894159f22 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -6,6 +6,7 @@
 #define LINUX_NFSD_VFS_H
 #include "nfsfh.h"
+#include "nfsd.h"
 /*
 * Flags for nfsd_permission
@@ -125,4 +126,11 @@ static inline void fh_drop_write(struct svc_fh *fh)
        }
 }
+static inline __be32 fh_getattr(struct svc_fh *fh, struct kstat *stat)
+{
+        struct path p = {.mnt = fh->fh_export->ex_path.mnt,
+                         .dentry = fh->fh_dentry};
+        return nfserrno(vfs_getattr(&p, stat));
+}
 #endif /* LINUX_NFSD_VFS_H */
diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
index 53b1863dd8f6..4f0481d63804 100644
--- a/fs/nfsd/xdr.h
+++ b/fs/nfsd/xdr.h
@@ -167,7 +167,7 @@ int nfssvc_encode_entry(void *, const char *name,
 int nfssvc_release_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
 /* Helper functions for NFSv2 ACL code */
-__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp);
+__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct kstat *stat);
 __be32 *nfs2svc_decode_fh(__be32 *p, struct svc_fh *fhp);
 #endif /* LINUX_NFSD_H */
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
index 7df980eb0562..b6d5542a4ac8 100644
--- a/fs/nfsd/xdr3.h
+++ b/fs/nfsd/xdr3.h
@@ -136,6 +136,7 @@ struct nfsd3_accessres {
        __be32                  status;
        struct svc_fh           fh;
        __u32                   access;
+        struct kstat            stat;
 };
 struct nfsd3_readlinkres {
@@ -225,6 +226,7 @@ struct nfsd3_getaclres {
        int                     mask;
        struct posix_acl        *acl_access;
        struct posix_acl        *acl_default;
+        struct kstat            stat;
 };
 /* dummy type for release */
diff --git a/fs/nilfs2/Kconfig b/fs/nilfs2/Kconfig
index 251da07b2a1d..80da8eb27393 100644
--- a/fs/nilfs2/Kconfig
+++ b/fs/nilfs2/Kconfig
@@ -1,6 +1,5 @@
 config NILFS2_FS
-        tristate "NILFS2 file system support (EXPERIMENTAL)"
+        tristate "NILFS2 file system support"
-        depends on EXPERIMENTAL
        select CRC32
        help
          NILFS2 is a log-structured file system (LFS) supporting continuous
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index df1a7fb238d1..f30b017740a7 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -259,7 +259,7 @@ static void nilfs_set_de_type(struct nilfs_dir_entry *de, struct inode *inode)
 static int nilfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
        loff_t pos = filp->f_pos;
-        struct inode *inode = filp->f_dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct super_block *sb = inode->i_sb;
        unsigned int offset = pos & ~PAGE_CACHE_MASK;
        unsigned long n = pos >> PAGE_CACHE_SHIFT;
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 61946883025c..08fdb77852ac 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -67,7 +67,7 @@ int nilfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
        struct page *page = vmf->page;
-        struct inode *inode = vma->vm_file->f_dentry->d_inode;
+        struct inode *inode = file_inode(vma->vm_file);
        struct nilfs_transaction_info ti;
        int ret = 0;
@@ -126,7 +126,7 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
        nilfs_transaction_commit(inode->i_sb);
 mapped:
-        wait_on_page_writeback(page);
+        wait_for_stable_page(page);
 out:
        sb_end_pagefault(inode->i_sb);
        return block_page_mkwrite_return(ret);
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index fdb180769485..b44bdb291b84 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -664,8 +664,11 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
        if (ret < 0)
                printk(KERN_ERR "NILFS: GC failed during preparation: "
                        "cannot read source blocks: err=%d\n", ret);
-        else
+        else {
+                if (nilfs_sb_need_update(nilfs))
+                        set_nilfs_discontinued(nilfs);
                ret = nilfs_clean_segments(inode->i_sb, argv, kbufs);
+        }
        nilfs_remove_all_gcinodes(nilfs);
        clear_nilfs_gc_running(nilfs);
@@ -793,7 +796,7 @@ static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp,
 long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
-        struct inode *inode = filp->f_dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        void __user *argp = (void __user *)arg;
        switch (cmd) {
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 1d0c0b84c5a3..9de78f08989e 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -517,11 +517,11 @@ static int nilfs_encode_fh(struct inode *inode, __u32 *fh, int *lenp,
        if (parent && *lenp < NILFS_FID_SIZE_CONNECTABLE) {
                *lenp = NILFS_FID_SIZE_CONNECTABLE;
-                return 255;
+                return FILEID_INVALID;
        }
        if (*lenp < NILFS_FID_SIZE_NON_CONNECTABLE) {
                *lenp = NILFS_FID_SIZE_NON_CONNECTABLE;
-                return 255;
+                return FILEID_INVALID;
        }
        fid->cno = root->cno;
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index 08b886f119ce..2bfe6dc413a0 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -174,7 +174,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id)
        struct dnotify_struct **prev;
        struct inode *inode;
-        inode = filp->f_path.dentry->d_inode;
+        inode = file_inode(filp);
        if (!S_ISDIR(inode->i_mode))
                return;
@@ -296,7 +296,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
        }
        /* dnotify only works on directories */
-        inode = filp->f_path.dentry->d_inode;
+        inode = file_inode(filp);
        if (!S_ISDIR(inode->i_mode)) {
                error = -ENOTDIR;
                goto out_err;
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 9ff4a5ee6e20..5d8444268a16 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -466,7 +466,7 @@ static int fanotify_find_path(int dfd, const char __user *filename,
                ret = -ENOTDIR;
                if ((flags & FAN_MARK_ONLYDIR) &&
-                    !(S_ISDIR(f.file->f_path.dentry->d_inode->i_mode))) {
+                    !(S_ISDIR(file_inode(f.file)->i_mode))) {
                        fdput(f);
                        goto out;
                }
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 6baadb5a8430..4bb21d67d9b1 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -52,7 +52,6 @@ void __fsnotify_vfsmount_delete(struct vfsmount *mnt)
 void __fsnotify_update_child_dentry_flags(struct inode *inode)
 {
        struct dentry *alias;
-        struct hlist_node *p;
        int watched;
        if (!S_ISDIR(inode->i_mode))
@@ -64,7 +63,7 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
        spin_lock(&inode->i_lock);
        /* run all of the dentries associated with this inode.  Since this is a
         * directory, there damn well better only be one item on this list */
-        hlist_for_each_entry(alias, p, &inode->i_dentry, d_alias) {
+        hlist_for_each_entry(alias, &inode->i_dentry, d_alias) {
                struct dentry *child;
                /* run all of the children of the original inode and fix their
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index f31e90fc050d..74825be65b7b 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -36,12 +36,11 @@
 static void fsnotify_recalc_inode_mask_locked(struct inode *inode)
 {
        struct fsnotify_mark *mark;
-        struct hlist_node *pos;
        __u32 new_mask = 0;
        assert_spin_locked(&inode->i_lock);
-        hlist_for_each_entry(mark, pos, &inode->i_fsnotify_marks, i.i_list)
+        hlist_for_each_entry(mark, &inode->i_fsnotify_marks, i.i_list)
                new_mask |= mark->mask;
        inode->i_fsnotify_mask = new_mask;
 }
@@ -87,11 +86,11 @@ void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark)
 void fsnotify_clear_marks_by_inode(struct inode *inode)
 {
        struct fsnotify_mark *mark, *lmark;
-        struct hlist_node *pos, *n;
+        struct hlist_node *n;
        LIST_HEAD(free_list);
        spin_lock(&inode->i_lock);
-        hlist_for_each_entry_safe(mark, pos, n, &inode->i_fsnotify_marks, i.i_list) {
+        hlist_for_each_entry_safe(mark, n, &inode->i_fsnotify_marks, i.i_list) {
                list_add(&mark->i.free_i_list, &free_list);
                hlist_del_init_rcu(&mark->i.i_list);
                fsnotify_get_mark(mark);
@@ -129,11 +128,10 @@ static struct fsnotify_mark *fsnotify_find_inode_mark_locked(
                struct inode *inode)
 {
        struct fsnotify_mark *mark;
-        struct hlist_node *pos;
        assert_spin_locked(&inode->i_lock);
-        hlist_for_each_entry(mark, pos, &inode->i_fsnotify_marks, i.i_list) {
+        hlist_for_each_entry(mark, &inode->i_fsnotify_marks, i.i_list) {
                if (mark->group == group) {
                        fsnotify_get_mark(mark);
                        return mark;
@@ -194,8 +192,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
                            struct fsnotify_group *group, struct inode *inode,
                            int allow_dups)
 {
-        struct fsnotify_mark *lmark;
+        struct fsnotify_mark *lmark, *last = NULL;
-        struct hlist_node *node, *last = NULL;
        int ret = 0;
        mark->flags |= FSNOTIFY_MARK_FLAG_INODE;
@@ -214,8 +211,8 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
        }
        /* should mark be in the middle of the current list? */
-        hlist_for_each_entry(lmark, node, &inode->i_fsnotify_marks, i.i_list) {
+        hlist_for_each_entry(lmark, &inode->i_fsnotify_marks, i.i_list) {
-                last = node;
+                last = lmark;
                if ((lmark->group == group) && !allow_dups) {
                        ret = -EEXIST;
@@ -235,7 +232,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
        BUG_ON(last == NULL);
        /* mark should be the last entry.  last is the current last entry */
-        hlist_add_after_rcu(last, &mark->i.i_list);
+        hlist_add_after_rcu(&last->i.i_list, &mark->i.i_list);
 out:
        fsnotify_recalc_inode_mask_locked(inode);
        spin_unlock(&inode->i_lock);
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 871569c7d609..4216308b81b4 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -197,7 +197,6 @@ static void inotify_free_group_priv(struct fsnotify_group *group)
 {
        /* ideally the idr is empty and we won't hit the BUG in the callback */
        idr_for_each(&group->inotify_data.idr, idr_callback, group);
-        idr_remove_all(&group->inotify_data.idr);
        idr_destroy(&group->inotify_data.idr);
        atomic_dec(&group->inotify_data.user->inotify_devs);
        free_uid(group->inotify_data.user);
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 228a2c2ad8d7..e0f7c1241a6a 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -364,22 +364,20 @@ static int inotify_add_to_idr(struct idr *idr, spinlock_t *idr_lock,
 {
        int ret;
-        do {
+        idr_preload(GFP_KERNEL);
-                if (unlikely(!idr_pre_get(idr, GFP_KERNEL)))
+        spin_lock(idr_lock);
-                        return -ENOMEM;
-                spin_lock(idr_lock);
+        ret = idr_alloc(idr, i_mark, *last_wd + 1, 0, GFP_NOWAIT);
-                ret = idr_get_new_above(idr, i_mark, *last_wd + 1,
+        if (ret >= 0) {
-                                        &i_mark->wd);
                /* we added the mark to the idr, take a reference */
-                if (!ret) {
+                i_mark->wd = ret;
-                        *last_wd = i_mark->wd;
+                *last_wd = i_mark->wd;
-                        fsnotify_get_mark(&i_mark->fsn_mark);
+                fsnotify_get_mark(&i_mark->fsn_mark);
-                }
+        }
-                spin_unlock(idr_lock);
-        } while (ret == -EAGAIN);
-        return ret;
+        spin_unlock(idr_lock);
+        idr_preload_end();
+        return ret < 0 ? ret : 0;
 }
 static struct inotify_inode_mark *inotify_idr_find_locked(struct fsnotify_group *group,
@@ -576,8 +574,6 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
        /* don't allow invalid bits: we don't want flags set */
        mask = inotify_arg_to_mask(arg);
-        if (unlikely(!(mask & IN_ALL_EVENTS)))
-                return -EINVAL;
        fsn_mark = fsnotify_find_inode_mark(group, inode);
        if (!fsn_mark)
@@ -629,8 +625,6 @@ static int inotify_new_watch(struct fsnotify_group *group,
        /* don't allow invalid bits: we don't want flags set */
        mask = inotify_arg_to_mask(arg);
-        if (unlikely(!(mask & IN_ALL_EVENTS)))
-                return -EINVAL;
        tmp_i_mark = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL);
        if (unlikely(!tmp_i_mark))
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index 4df58b8ea64a..68ca5a8704b5 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -33,12 +33,12 @@
 void fsnotify_clear_marks_by_mount(struct vfsmount *mnt)
 {
        struct fsnotify_mark *mark, *lmark;
-        struct hlist_node *pos, *n;
+        struct hlist_node *n;
        struct mount *m = real_mount(mnt);
        LIST_HEAD(free_list);
        spin_lock(&mnt->mnt_root->d_lock);
-        hlist_for_each_entry_safe(mark, pos, n, &m->mnt_fsnotify_marks, m.m_list) {
+        hlist_for_each_entry_safe(mark, n, &m->mnt_fsnotify_marks, m.m_list) {
                list_add(&mark->m.free_m_list, &free_list);
                hlist_del_init_rcu(&mark->m.m_list);
                fsnotify_get_mark(mark);
@@ -71,12 +71,11 @@ static void fsnotify_recalc_vfsmount_mask_locked(struct vfsmount *mnt)
 {
        struct mount *m = real_mount(mnt);
        struct fsnotify_mark *mark;
-        struct hlist_node *pos;
        __u32 new_mask = 0;
        assert_spin_locked(&mnt->mnt_root->d_lock);
-        hlist_for_each_entry(mark, pos, &m->mnt_fsnotify_marks, m.m_list)
+        hlist_for_each_entry(mark, &m->mnt_fsnotify_marks, m.m_list)
                new_mask |= mark->mask;
        m->mnt_fsnotify_mask = new_mask;
 }
@@ -114,11 +113,10 @@ static struct fsnotify_mark *fsnotify_find_vfsmount_mark_locked(struct fsnotify_
 {
        struct mount *m = real_mount(mnt);
        struct fsnotify_mark *mark;
-        struct hlist_node *pos;
        assert_spin_locked(&mnt->mnt_root->d_lock);
-        hlist_for_each_entry(mark, pos, &m->mnt_fsnotify_marks, m.m_list) {
+        hlist_for_each_entry(mark, &m->mnt_fsnotify_marks, m.m_list) {
                if (mark->group == group) {
                        fsnotify_get_mark(mark);
                        return mark;
@@ -153,8 +151,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
                               int allow_dups)
 {
        struct mount *m = real_mount(mnt);
-        struct fsnotify_mark *lmark;
+        struct fsnotify_mark *lmark, *last = NULL;
-        struct hlist_node *node, *last = NULL;
        int ret = 0;
        mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT;
@@ -173,8 +170,8 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
        }
        /* should mark be in the middle of the current list? */
-        hlist_for_each_entry(lmark, node, &m->mnt_fsnotify_marks, m.m_list) {
+        hlist_for_each_entry(lmark, &m->mnt_fsnotify_marks, m.m_list) {
-                last = node;
+                last = lmark;
                if ((lmark->group == group) && !allow_dups) {
                        ret = -EEXIST;
@@ -194,7 +191,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
        BUG_ON(last == NULL);
        /* mark should be the last entry.  last is the current last entry */
-        hlist_add_after_rcu(last, &mark->m.m_list);
+        hlist_add_after_rcu(&last->m.m_list, &mark->m.m_list);
 out:
        fsnotify_recalc_vfsmount_mask_locked(mnt);
        spin_unlock(&mnt->mnt_root->d_lock);
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index 99e36107ff60..aa411c3f20e9 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -1101,7 +1101,7 @@ static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
        s64 ia_pos, ia_start, prev_ia_pos, bmp_pos;
        loff_t fpos, i_size;
-        struct inode *bmp_vi, *vdir = filp->f_path.dentry->d_inode;
+        struct inode *bmp_vi, *vdir = file_inode(filp);
        struct super_block *sb = vdir->i_sb;
        ntfs_inode *ndir = NTFS_I(vdir);
        ntfs_volume *vol = NTFS_SB(sb);
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index 260b16281fc3..8a404576fb26 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -65,7 +65,20 @@ static struct posix_acl *ocfs2_acl_from_xattr(const void *value, size_t size)
                acl->a_entries[n].e_tag  = le16_to_cpu(entry->e_tag);
                acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm);
-                acl->a_entries[n].e_id   = le32_to_cpu(entry->e_id);
+                switch(acl->a_entries[n].e_tag) {
+                case ACL_USER:
+                        acl->a_entries[n].e_uid =
+                                make_kuid(&init_user_ns,
+                                          le32_to_cpu(entry->e_id));
+                        break;
+                case ACL_GROUP:
+                        acl->a_entries[n].e_gid =
+                                make_kgid(&init_user_ns,
+                                          le32_to_cpu(entry->e_id));
+                        break;
+                default:
+                        break;
+                }
                value += sizeof(struct posix_acl_entry);
        }
@@ -91,7 +104,21 @@ static void *ocfs2_acl_to_xattr(const struct posix_acl *acl, size_t *size)
        for (n = 0; n < acl->a_count; n++, entry++) {
                entry->e_tag  = cpu_to_le16(acl->a_entries[n].e_tag);
                entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm);
-                entry->e_id   = cpu_to_le32(acl->a_entries[n].e_id);
+                switch(acl->a_entries[n].e_tag) {
+                case ACL_USER:
+                        entry->e_id = cpu_to_le32(
+                                from_kuid(&init_user_ns,
+                                          acl->a_entries[n].e_uid));
+                        break;
+                case ACL_GROUP:
+                        entry->e_id = cpu_to_le32(
+                                from_kgid(&init_user_ns,
+                                          acl->a_entries[n].e_gid));
+                        break;
+                default:
+                        entry->e_id = cpu_to_le32(ACL_UNDEFINED_ID);
+                        break;
+                }
        }
        return ocfs2_acl;
 }
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 31b9463fba1f..b8a9d87231b1 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -6751,8 +6751,7 @@ int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
                mlog_errno(ret);
 out:
-        if (pages)
+        kfree(pages);
-                kfree(pages);
        return ret;
 }
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 657743254eb9..20dfec72e903 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -569,7 +569,7 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
                             int ret,
                             bool is_async)
 {
-        struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(iocb->ki_filp);
        int level;
        wait_queue_head_t *wq = ocfs2_ioend_wq(inode);
@@ -593,9 +593,9 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
        level = ocfs2_iocb_rw_locked_level(iocb);
        ocfs2_rw_unlock(inode, level);
+        inode_dio_done(inode);
        if (is_async)
                aio_complete(iocb, ret, 0);
-        inode_dio_done(inode);
 }
 /*
@@ -626,7 +626,7 @@ static ssize_t ocfs2_direct_IO(int rw,
                               unsigned long nr_segs)
 {
        struct file *file = iocb->ki_filp;
-        struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host;
+        struct inode *inode = file_inode(file)->i_mapping->host;
        /*
         * Fallback to buffered I/O if we see an inode without
@@ -1194,6 +1194,7 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping,
                                goto out;
                        }
                }
+                wait_for_stable_page(wc->w_pages[i]);
                if (index == target_index)
                        wc->w_target_page = wc->w_pages[i];
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index f7c648d7d6bf..42252bf64b51 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1471,8 +1471,7 @@ static void o2hb_region_release(struct config_item *item)
        mlog(ML_HEARTBEAT, "hb region release (%s)\n", reg->hr_dev_name);
-        if (reg->hr_tmp_block)
+        kfree(reg->hr_tmp_block);
-                kfree(reg->hr_tmp_block);
        if (reg->hr_slot_data) {
                for (i = 0; i < reg->hr_num_pages; i++) {
@@ -1486,8 +1485,7 @@ static void o2hb_region_release(struct config_item *item)
        if (reg->hr_bdev)
                blkdev_put(reg->hr_bdev, FMODE_READ|FMODE_WRITE);
-        if (reg->hr_slots)
+        kfree(reg->hr_slots);
-                kfree(reg->hr_slots);
        kfree(reg->hr_db_regnum);
        kfree(reg->hr_db_livenodes);
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 1bfe8802cc1e..aa88bd8bcedc 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -304,28 +304,22 @@ static u8 o2net_num_from_nn(struct o2net_node *nn)
 static int o2net_prep_nsw(struct o2net_node *nn, struct o2net_status_wait *nsw)
 {
-        int ret = 0;
+        int ret;
-        do {
-                if (!idr_pre_get(&nn->nn_status_idr, GFP_ATOMIC)) {
-                        ret = -EAGAIN;
-                        break;
-                }
-                spin_lock(&nn->nn_lock);
-                ret = idr_get_new(&nn->nn_status_idr, nsw, &nsw->ns_id);
-                if (ret == 0)
-                        list_add_tail(&nsw->ns_node_item,
-                                      &nn->nn_status_list);
-                spin_unlock(&nn->nn_lock);
-        } while (ret == -EAGAIN);
-        if (ret == 0)  {
+        spin_lock(&nn->nn_lock);
-                init_waitqueue_head(&nsw->ns_wq);
+        ret = idr_alloc(&nn->nn_status_idr, nsw, 0, 0, GFP_ATOMIC);
-                nsw->ns_sys_status = O2NET_ERR_NONE;
+        if (ret >= 0) {
-                nsw->ns_status = 0;
+                nsw->ns_id = ret;
+                list_add_tail(&nsw->ns_node_item, &nn->nn_status_list);
        }
+        spin_unlock(&nn->nn_lock);
+        if (ret < 0)
+                return ret;
-        return ret;
+        init_waitqueue_head(&nsw->ns_wq);
+        nsw->ns_sys_status = O2NET_ERR_NONE;
+        nsw->ns_status = 0;
+        return 0;
 }
 static void o2net_complete_nsw_locked(struct o2net_node *nn,
@@ -870,7 +864,7 @@ int o2net_register_handler(u32 msg_type, u32 key, u32 max_len,
                /* we've had some trouble with handlers seemingly vanishing. */
                mlog_bug_on_msg(o2net_handler_tree_lookup(msg_type, key, &p,
                                                          &parent) == NULL,
-                                "couldn't find handler we *just* registerd "
+                                "couldn't find handler we *just* registered "
                                "for type %u key %08x\n", msg_type, key);
        }
        write_unlock(&o2net_handler_lock);
@@ -1165,10 +1159,8 @@ out:
        o2net_debug_del_nst(&nst); /* must be before dropping sc and node */
        if (sc)
                sc_put(sc);
-        if (vec)
+        kfree(vec);
-                kfree(vec);
+        kfree(msg);
-        if (msg)
-                kfree(msg);
        o2net_complete_nsw(nn, &nsw, 0, 0, 0);
        return ret;
 }
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index 8db4b58b2e4b..ef999729e274 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -169,11 +169,10 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode,
                                      u64 parent_blkno,
                                      int skip_unhashed)
 {
-        struct hlist_node *p;
        struct dentry *dentry;
        spin_lock(&inode->i_lock);
-        hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) {
+        hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
                spin_lock(&dentry->d_lock);
                if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) {
                        trace_ocfs2_find_local_alias(dentry->d_name.len,
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 8fe4e2892ab9..f1e1aed8f638 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -67,7 +67,6 @@
 #define NAMEI_RA_CHUNKS  2
 #define NAMEI_RA_BLOCKS  4
 #define NAMEI_RA_SIZE        (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
-#define NAMEI_RA_INDEX(c,b)  (((c) * NAMEI_RA_BLOCKS) + (b))
 static unsigned char ocfs2_filetype_table[] = {
        DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
@@ -2015,12 +2014,12 @@ int ocfs2_dir_foreach(struct inode *inode, loff_t *f_pos, void *priv,
 int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
 {
        int error = 0;
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        int lock_level = 0;
        trace_ocfs2_readdir((unsigned long long)OCFS2_I(inode)->ip_blkno);
-        error = ocfs2_inode_lock_atime(inode, filp->f_vfsmnt, &lock_level);
+        error = ocfs2_inode_lock_atime(inode, filp->f_path.mnt, &lock_level);
        if (lock_level && error >= 0) {
                /* We release EX lock which used to update atime
                 * and get PR lock again to reduce contention
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 9e89d70df337..dbb17c07656a 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -319,9 +319,7 @@ static void dlm_free_ctxt_mem(struct dlm_ctxt *dlm)
        if (dlm->master_hash)
                dlm_free_pagevec((void **)dlm->master_hash, DLM_HASH_PAGES);
-        if (dlm->name)
+        kfree(dlm->name);
-                kfree(dlm->name);
        kfree(dlm);
 }
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 005261c333b0..33ecbe0e6734 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2020,7 +2020,7 @@ int dlm_dispatch_assert_master(struct dlm_ctxt *dlm,
                               int ignore_higher, u8 request_from, u32 flags)
 {
        struct dlm_work_item *item;
-        item = kzalloc(sizeof(*item), GFP_NOFS);
+        item = kzalloc(sizeof(*item), GFP_ATOMIC);
        if (!item)
                return -ENOMEM;
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 01ebfd0bdad7..eeac97bb3bfa 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -2083,7 +2083,6 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm,
                                              u8 dead_node, u8 new_master)
 {
        int i;
-        struct hlist_node *hash_iter;
        struct hlist_head *bucket;
        struct dlm_lock_resource *res, *next;
@@ -2114,7 +2113,7 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm,
         * if necessary */
        for (i = 0; i < DLM_HASH_BUCKETS; i++) {
                bucket = dlm_lockres_hash(dlm, i);
-                hlist_for_each_entry(res, hash_iter, bucket, hash_node) {
+                hlist_for_each_entry(res, bucket, hash_node) {
                        if (!(res->state & DLM_LOCK_RES_RECOVERING))
                                continue;
@@ -2273,7 +2272,6 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
 static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
 {
-        struct hlist_node *iter;
        struct dlm_lock_resource *res;
        int i;
        struct hlist_head *bucket;
@@ -2299,7 +2297,7 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
         */
        for (i = 0; i < DLM_HASH_BUCKETS; i++) {
                bucket = dlm_lockres_hash(dlm, i);
-                hlist_for_each_entry(res, iter, bucket, hash_node) {
+                hlist_for_each_entry(res, bucket, hash_node) {
                        /* always prune any $RECOVERY entries for dead nodes,
                         * otherwise hangs can occur during later recovery */
                        if (dlm_is_recovery_lock(res->lockname.name,
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 16b712d260d4..4c5fc8d77dc2 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -224,7 +224,7 @@ static int dlmfs_file_setattr(struct dentry *dentry, struct iattr *attr)
 static unsigned int dlmfs_file_poll(struct file *file, poll_table *wait)
 {
        int event = 0;
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(file);
        struct dlmfs_inode_private *ip = DLMFS_I(inode);
        poll_wait(file, &ip->ip_lockres.l_event, wait);
@@ -245,7 +245,7 @@ static ssize_t dlmfs_file_read(struct file *filp,
        int bytes_left;
        ssize_t readlen, got;
        char *lvb_buf;
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        mlog(0, "inode %lu, count = %zu, *ppos = %llu\n",
                inode->i_ino, count, *ppos);
@@ -293,7 +293,7 @@ static ssize_t dlmfs_file_write(struct file *filp,
        int bytes_left;
        ssize_t writelen;
        char *lvb_buf;
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        mlog(0, "inode %lu, count = %zu, *ppos = %llu\n",
                inode->i_ino, count, *ppos);
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 4f7795fb5fc0..12ae194ac943 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2045,8 +2045,8 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
        lvb->lvb_version   = OCFS2_LVB_VERSION;
        lvb->lvb_isize     = cpu_to_be64(i_size_read(inode));
        lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters);
-        lvb->lvb_iuid      = cpu_to_be32(inode->i_uid);
+        lvb->lvb_iuid      = cpu_to_be32(i_uid_read(inode));
-        lvb->lvb_igid      = cpu_to_be32(inode->i_gid);
+        lvb->lvb_igid      = cpu_to_be32(i_gid_read(inode));
        lvb->lvb_imode     = cpu_to_be16(inode->i_mode);
        lvb->lvb_inlink    = cpu_to_be16(inode->i_nlink);
        lvb->lvb_iatime_packed  =
@@ -2095,8 +2095,8 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
        else
                inode->i_blocks = ocfs2_inode_sector_count(inode);
-        inode->i_uid     = be32_to_cpu(lvb->lvb_iuid);
+        i_uid_write(inode, be32_to_cpu(lvb->lvb_iuid));
-        inode->i_gid     = be32_to_cpu(lvb->lvb_igid);
+        i_gid_write(inode, be32_to_cpu(lvb->lvb_igid));
        inode->i_mode    = be16_to_cpu(lvb->lvb_imode);
        set_nlink(inode, be16_to_cpu(lvb->lvb_inlink));
        ocfs2_unpack_timespec(&inode->i_atime,
@@ -2545,6 +2545,7 @@ int ocfs2_super_lock(struct ocfs2_super *osb,
         * everything is up to the caller :) */
        status = ocfs2_should_refresh_lock_res(lockres);
        if (status < 0) {
+                ocfs2_cluster_unlock(osb, lockres, level);
                mlog_errno(status);
                goto bail;
        }
@@ -2553,8 +2554,10 @@ int ocfs2_super_lock(struct ocfs2_super *osb,
                ocfs2_complete_lock_res_refresh(lockres, status);
-                if (status < 0)
+                if (status < 0) {
+                        ocfs2_cluster_unlock(osb, lockres, level);
                        mlog_errno(status);
+                }
                ocfs2_track_lock_refresh(lockres);
        }
 bail:
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 322216a5f0dd..29651167190d 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -195,11 +195,11 @@ static int ocfs2_encode_fh(struct inode *inode, u32 *fh_in, int *max_len,
        if (parent && (len < 6)) {
                *max_len = 6;
-                type = 255;
+                type = FILEID_INVALID;
                goto bail;
        } else if (len < 3) {
                *max_len = 3;
-                type = 255;
+                type = FILEID_INVALID;
                goto bail;
        }
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index f487aa343442..1c39efb71bab 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -282,8 +282,7 @@ search:
        spin_unlock(&oi->ip_lock);
 out:
-        if (new_emi)
+        kfree(new_emi);
-                kfree(new_emi);
 }
 static int ocfs2_last_eb_is_empty(struct inode *inode,
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 37d313ede159..6474cb44004d 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1116,7 +1116,8 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
                            (unsigned long long)OCFS2_I(inode)->ip_blkno,
                            dentry->d_name.len, dentry->d_name.name,
                            attr->ia_valid, attr->ia_mode,
-                            attr->ia_uid, attr->ia_gid);
+                            from_kuid(&init_user_ns, attr->ia_uid),
+                            from_kgid(&init_user_ns, attr->ia_gid));
        /* ensuring we don't even attempt to truncate a symlink */
        if (S_ISLNK(inode->i_mode))
@@ -1174,14 +1175,14 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
                }
        }
-        if ((attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
+        if ((attr->ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)) ||
-            (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
+            (attr->ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) {
                /*
                 * Gather pointers to quota structures so that allocation /
                 * freeing of quota structures happens here and not inside
                 * dquot_transfer() where we have problems with lock ordering
                 */
-                if (attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid
+                if (attr->ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)
                    && OCFS2_HAS_RO_COMPAT_FEATURE(sb,
                    OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
                        transfer_to[USRQUOTA] = dqget(sb, make_kqid_uid(attr->ia_uid));
@@ -1190,7 +1191,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
                                goto bail_unlock;
                        }
                }
-                if (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid
+                if (attr->ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid)
                    && OCFS2_HAS_RO_COMPAT_FEATURE(sb,
                    OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
                        transfer_to[GRPQUOTA] = dqget(sb, make_kqid_gid(attr->ia_gid));
@@ -1949,7 +1950,7 @@ out:
 int ocfs2_change_file_space(struct file *file, unsigned int cmd,
                            struct ocfs2_space_resv *sr)
 {
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(file);
        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
        int ret;
@@ -1977,7 +1978,7 @@ int ocfs2_change_file_space(struct file *file, unsigned int cmd,
 static long ocfs2_fallocate(struct file *file, int mode, loff_t offset,
                            loff_t len)
 {
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(file);
        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
        struct ocfs2_space_resv sr;
        int change_size = 1;
@@ -2232,7 +2233,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
        loff_t old_size, *ppos = &iocb->ki_pos;
        u32 old_clusters;
        struct file *file = iocb->ki_filp;
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(file);
        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
        int full_coherency = !(osb->s_mount_opt &
                               OCFS2_MOUNT_COHERENCY_BUFFERED);
@@ -2516,7 +2517,7 @@ static ssize_t ocfs2_file_splice_read(struct file *in,
                                      unsigned int flags)
 {
        int ret = 0, lock_level = 0;
-        struct inode *inode = in->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(in);
        trace_ocfs2_file_splice_read(inode, in, in->f_path.dentry,
                        (unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -2526,7 +2527,7 @@ static ssize_t ocfs2_file_splice_read(struct file *in,
        /*
         * See the comment in ocfs2_file_aio_read()
         */
-        ret = ocfs2_inode_lock_atime(inode, in->f_vfsmnt, &lock_level);
+        ret = ocfs2_inode_lock_atime(inode, in->f_path.mnt, &lock_level);
        if (ret < 0) {
                mlog_errno(ret);
                goto bail;
@@ -2546,7 +2547,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
 {
        int ret = 0, rw_level = -1, have_alloc_sem = 0, lock_level = 0;
        struct file *filp = iocb->ki_filp;
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        trace_ocfs2_file_aio_read(inode, filp, filp->f_path.dentry,
                        (unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -2589,7 +2590,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
         * like i_size. This allows the checks down below
         * generic_file_aio_read() a chance of actually working.
         */
-        ret = ocfs2_inode_lock_atime(inode, filp->f_vfsmnt, &lock_level);
+        ret = ocfs2_inode_lock_atime(inode, filp->f_path.mnt, &lock_level);
        if (ret < 0) {
                mlog_errno(ret);
                goto bail;
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index d89e08a81eda..f87f9bd1edff 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -269,8 +269,8 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
        inode->i_generation = le32_to_cpu(fe->i_generation);
        inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));
        inode->i_mode = le16_to_cpu(fe->i_mode);
-        inode->i_uid = le32_to_cpu(fe->i_uid);
+        i_uid_write(inode, le32_to_cpu(fe->i_uid));
-        inode->i_gid = le32_to_cpu(fe->i_gid);
+        i_gid_write(inode, le32_to_cpu(fe->i_gid));
        /* Fast symlinks will have i_size but no allocated clusters. */
        if (S_ISLNK(inode->i_mode) && !fe->i_clusters) {
@@ -1259,8 +1259,8 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
        fe->i_size = cpu_to_le64(i_size_read(inode));
        ocfs2_set_links_count(fe, inode->i_nlink);
-        fe->i_uid = cpu_to_le32(inode->i_uid);
+        fe->i_uid = cpu_to_le32(i_uid_read(inode));
-        fe->i_gid = cpu_to_le32(inode->i_gid);
+        fe->i_gid = cpu_to_le32(i_gid_read(inode));
        fe->i_mode = cpu_to_le16(inode->i_mode);
        fe->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
        fe->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
@@ -1290,8 +1290,8 @@ void ocfs2_refresh_inode(struct inode *inode,
        ocfs2_set_inode_flags(inode);
        i_size_write(inode, le64_to_cpu(fe->i_size));
        set_nlink(inode, ocfs2_read_links_count(fe));
-        inode->i_uid = le32_to_cpu(fe->i_uid);
+        i_uid_write(inode, le32_to_cpu(fe->i_uid));
-        inode->i_gid = le32_to_cpu(fe->i_gid);
+        i_gid_write(inode, le32_to_cpu(fe->i_gid));
        inode->i_mode = le16_to_cpu(fe->i_mode);
        if (S_ISLNK(inode->i_mode) && le32_to_cpu(fe->i_clusters) == 0)
                inode->i_blocks = 0;
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index f20edcbfe700..752f0b26221d 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -881,7 +881,7 @@ bail:
 long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        unsigned int flags;
        int new_clusters;
        int status;
@@ -994,7 +994,7 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 {
        bool preserve;
        struct reflink_arguments args;
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(file);
        struct ocfs2_info info;
        void __user *argp = (void __user *)arg;
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 2dd36af79e26..8eccfabcd12e 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1234,11 +1234,8 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
                /* Though we wish to avoid it, we are in fact safe in
                 * skipping local alloc cleanup as fsck.ocfs2 is more
                 * than capable of reclaiming unused space. */
-                if (la_dinode)
+                kfree(la_dinode);
-                        kfree(la_dinode);
+                kfree(tl_dinode);
-                if (tl_dinode)
-                        kfree(tl_dinode);
                if (qrec)
                        ocfs2_free_quota_recovery(qrec);
@@ -1408,8 +1405,7 @@ bail:
        mutex_unlock(&osb->recovery_lock);
-        if (rm_quota)
+        kfree(rm_quota);
-                kfree(rm_quota);
        /* no one is callint kthread_stop() for us so the kthread() api
         * requires that we call do_exit().  And it isn't exported, but
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index a9f78c74d687..aebeacd807c3 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -476,8 +476,7 @@ out:
        if (local_alloc_inode)
                iput(local_alloc_inode);
-        if (alloc_copy)
+        kfree(alloc_copy);
-                kfree(alloc_copy);
 }
 /*
@@ -534,7 +533,7 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
                mlog_errno(status);
 bail:
-        if ((status < 0) && (*alloc_copy)) {
+        if (status < 0) {
                kfree(*alloc_copy);
                *alloc_copy = NULL;
        }
@@ -1290,8 +1289,7 @@ bail:
        if (main_bm_inode)
                iput(main_bm_inode);
-        if (alloc_copy)
+        kfree(alloc_copy);
-                kfree(alloc_copy);
        if (ac)
                ocfs2_free_alloc_context(ac);
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index 47a87dda54ce..10d66c75cecb 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -62,7 +62,7 @@ static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh,
                                struct page *page)
 {
        int ret = VM_FAULT_NOPAGE;
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(file);
        struct address_space *mapping = inode->i_mapping;
        loff_t pos = page_offset(page);
        unsigned int len = PAGE_CACHE_SIZE;
@@ -131,7 +131,7 @@ out:
 static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
        struct page *page = vmf->page;
-        struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(vma->vm_file);
        struct buffer_head *di_bh = NULL;
        sigset_t oldset;
        int ret;
@@ -180,13 +180,13 @@ int ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
 {
        int ret = 0, lock_level = 0;
-        ret = ocfs2_inode_lock_atime(file->f_dentry->d_inode,
+        ret = ocfs2_inode_lock_atime(file_inode(file),
-                                    file->f_vfsmnt, &lock_level);
+                                    file->f_path.mnt, &lock_level);
        if (ret < 0) {
                mlog_errno(ret);
                goto out;
        }
-        ocfs2_inode_unlock(file->f_dentry->d_inode, lock_level);
+        ocfs2_inode_unlock(file_inode(file), lock_level);
 out:
        vma->vm_ops = &ocfs2_file_vm_ops;
        return 0;
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 6083432f667e..9f8dcadd9a50 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -1055,7 +1055,7 @@ int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp)
 {
        int status;
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct ocfs2_move_extents range;
        struct ocfs2_move_extents_context *context = NULL;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index f1fd0741162b..04ee1b57c243 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -512,8 +512,8 @@ static int __ocfs2_mknod_locked(struct inode *dir,
        fe->i_suballoc_loc = cpu_to_le64(suballoc_loc);
        fe->i_suballoc_bit = cpu_to_le16(suballoc_bit);
        fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot);
-        fe->i_uid = cpu_to_le32(inode->i_uid);
+        fe->i_uid = cpu_to_le32(i_uid_read(inode));
-        fe->i_gid = cpu_to_le32(inode->i_gid);
+        fe->i_gid = cpu_to_le32(i_gid_read(inode));
        fe->i_mode = cpu_to_le16(inode->i_mode);
        if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
                fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev));
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 30a055049e16..998b17eda09d 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -2927,7 +2927,7 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle,
                                     u32 new_cluster, u32 new_len)
 {
        int ret = 0, partial;
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(file);
        struct ocfs2_caching_info *ci = INODE_CACHE(inode);
        struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
        u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster);
@@ -3020,7 +3020,7 @@ int ocfs2_duplicate_clusters_by_jbd(handle_t *handle,
                                    u32 new_cluster, u32 new_len)
 {
        int ret = 0;
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(file);
        struct super_block *sb = inode->i_sb;
        struct ocfs2_caching_info *ci = INODE_CACHE(inode);
        int i, blocks = ocfs2_clusters_to_blocks(sb, new_len);
@@ -4407,7 +4407,7 @@ static int ocfs2_vfs_reflink(struct dentry *old_dentry, struct inode *dir,
         * rights to do so.
         */
        if (preserve) {
-                if ((current_fsuid() != inode->i_uid) && !capable(CAP_CHOWN))
+                if (!uid_eq(current_fsuid(), inode->i_uid) && !capable(CAP_CHOWN))
                        return -EPERM;
                if (!in_group_p(inode->i_gid) && !capable(CAP_CHOWN))
                        return -EPERM;
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index 94368017edb3..bf1f8930456f 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -376,7 +376,7 @@ static int o2cb_cluster_connect(struct ocfs2_cluster_connection *conn)
        dlm_register_eviction_cb(dlm, &priv->op_eviction_cb);
 out_free:
-        if (rc && conn->cc_private)
+        if (rc)
                kfree(conn->cc_private);
 out:
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index f169da4624fd..b7e74b580c0f 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -642,7 +642,7 @@ ocfs2_block_group_alloc_discontig(handle_t *handle,
         * cluster groups will be staying in cache for the duration of
         * this operation.
         */
-        ac->ac_allow_chain_relink = 0;
+        ac->ac_disable_chain_relink = 1;
        /* Claim the first region */
        status = ocfs2_block_group_claim_bits(osb, handle, ac, min_bits,
@@ -1823,7 +1823,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
         * Do this *after* figuring out how many bits we're taking out
         * of our target group.
         */
-        if (ac->ac_allow_chain_relink &&
+        if (!ac->ac_disable_chain_relink &&
            (prev_group_bh) &&
            (ocfs2_block_group_reasonably_empty(bg, res->sr_bits))) {
                status = ocfs2_relink_block_group(handle, alloc_inode,
@@ -1928,7 +1928,6 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
        victim = ocfs2_find_victim_chain(cl);
        ac->ac_chain = victim;
-        ac->ac_allow_chain_relink = 1;
        status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits,
                                    res, &bits_left);
@@ -1947,7 +1946,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
         * searching each chain in order. Don't allow chain relinking
         * because we only calculate enough journal credits for one
         * relink per alloc. */
-        ac->ac_allow_chain_relink = 0;
+        ac->ac_disable_chain_relink = 1;
        for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i ++) {
                if (i == victim)
                        continue;
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index b8afabfeede4..a36d0aa50911 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -49,7 +49,7 @@ struct ocfs2_alloc_context {
        /* these are used by the chain search */
        u16    ac_chain;
-        int    ac_allow_chain_relink;
+        int    ac_disable_chain_relink;
        group_search_t *ac_group_search;
        u64    ac_last_group;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 0e91ec22a940..9b6910dec4ba 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -2525,8 +2525,7 @@ static int ocfs2_check_volume(struct ocfs2_super *osb)
                mlog_errno(status);
 finally:
-        if (local_alloc)
+        kfree(local_alloc);
-                kfree(local_alloc);
        if (status)
                mlog_errno(status);
@@ -2553,8 +2552,7 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb)
         * we free it here.
         */
        kfree(osb->journal);
-        if (osb->local_alloc_copy)
+        kfree(osb->local_alloc_copy);
-                kfree(osb->local_alloc_copy);
        kfree(osb->uuid_str);
        ocfs2_put_dlm_debug(osb->osb_dlm_debug);
        memset(osb, 0, sizeof(struct ocfs2_super));
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c
index f1fbb4b552ad..66edce7ecfd7 100644
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -57,7 +57,7 @@
 static int ocfs2_fast_symlink_readpage(struct file *unused, struct page *page)
 {
        struct inode *inode = page->mapping->host;
-        struct buffer_head *bh;
+        struct buffer_head *bh = NULL;
        int status = ocfs2_read_inode_block(inode, &bh);
        struct ocfs2_dinode *fe;
        const char *link;
diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c
index 3d635f4bbb20..f053688d22a3 100644
--- a/fs/ocfs2/sysfile.c
+++ b/fs/ocfs2/sysfile.c
@@ -91,8 +91,7 @@ static struct inode **get_local_system_inode(struct ocfs2_super *osb,
                } else
                        osb->local_system_inodes = local_system_inodes;
                spin_unlock(&osb->osb_lock);
-                if (unlikely(free))
+                kfree(free);
-                        kfree(free);
        }
        index = (slot * NUM_LOCAL_SYSTEM_INODES) +
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 0ba9ea1e7961..2e3ea308c144 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -7189,7 +7189,7 @@ int ocfs2_init_security_and_acl(struct inode *dir,
        struct buffer_head *dir_bh = NULL;
        ret = ocfs2_init_security_get(inode, dir, qstr, NULL);
-        if (!ret) {
+        if (ret) {
                mlog_errno(ret);
                goto leave;
        }
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
index fb5b3ff79dc6..acbaebcad3a8 100644
--- a/fs/omfs/dir.c
+++ b/fs/omfs/dir.c
@@ -330,7 +330,7 @@ int omfs_is_bad(struct omfs_sb_info *sbi, struct omfs_header *header,
 static int omfs_fill_chain(struct file *filp, void *dirent, filldir_t filldir,
                u64 fsblock, int hindex)
 {
-        struct inode *dir = filp->f_dentry->d_inode;
+        struct inode *dir = file_inode(filp);
        struct buffer_head *bh;
        struct omfs_inode *oi;
        u64 self;
@@ -405,7 +405,7 @@ out:
 static int omfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-        struct inode *dir = filp->f_dentry->d_inode;
+        struct inode *dir = file_inode(filp);
        struct buffer_head *bh;
        loff_t offset, res;
        unsigned int hchain, hindex;
diff --git a/fs/open.c b/fs/open.c
index 9b33c0cbfacf..62f907e3bc36 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -228,7 +228,7 @@ SYSCALL_ALIAS(sys_ftruncate64, SyS_ftruncate64);
 int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 {
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(file);
        long ret;
        if (offset < 0 || len <= 0)
@@ -426,7 +426,7 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd)
        if (!f.file)
                goto out;
-        inode = f.file->f_path.dentry->d_inode;
+        inode = file_inode(f.file);
        error = -ENOTDIR;
        if (!S_ISDIR(inode->i_mode))
@@ -689,7 +689,7 @@ static int do_dentry_open(struct file *f,
                f->f_mode = FMODE_PATH;
        path_get(&f->f_path);
-        inode = f->f_path.dentry->d_inode;
+        inode = file_inode(f);
        if (f->f_mode & FMODE_WRITE) {
                error = __get_file_write_access(inode, f->f_path.mnt);
                if (error)
@@ -699,7 +699,6 @@ static int do_dentry_open(struct file *f,
        }
        f->f_mapping = inode->i_mapping;
-        f->f_pos = 0;
        file_sb_list_add(f, inode->i_sb);
        if (unlikely(f->f_mode & FMODE_PATH)) {
@@ -810,23 +809,22 @@ struct file *dentry_open(const struct path *path, int flags,
        /* We must always pass in a valid mount pointer. */
        BUG_ON(!path->mnt);
-        error = -ENFILE;
        f = get_empty_filp();
-        if (f == NULL)
+        if (!IS_ERR(f)) {
-                return ERR_PTR(error);
+                f->f_flags = flags;
+                f->f_path = *path;
-        f->f_flags = flags;
+                error = do_dentry_open(f, NULL, cred);
-        f->f_path = *path;
+                if (!error) {
-        error = do_dentry_open(f, NULL, cred);
+                        /* from now on we need fput() to dispose of f */
-        if (!error) {
+                        error = open_check_o_direct(f);
-                error = open_check_o_direct(f);
+                        if (error) {
-                if (error) {
+                                fput(f);
-                        fput(f);
+                                f = ERR_PTR(error);
+                        }
+                } else { 
+                        put_filp(f);
                        f = ERR_PTR(error);
                }
-        } else { 
-                put_filp(f);
-                f = ERR_PTR(error);
        }
        return f;
 }
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 2ad080faca34..ae47fa7efb9d 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -262,7 +262,7 @@ found:
 static int openpromfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
 {
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct op_inode_info *oi = OP_I(inode);
        struct device_node *dp = oi->u.node;
        struct device_node *child;
diff --git a/fs/pipe.c b/fs/pipe.c
index bd3479db4b62..64a494cef0a0 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -361,7 +361,7 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov,
           unsigned long nr_segs, loff_t pos)
 {
        struct file *filp = iocb->ki_filp;
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct pipe_inode_info *pipe;
        int do_wakeup;
        ssize_t ret;
@@ -486,7 +486,7 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov,
            unsigned long nr_segs, loff_t ppos)
 {
        struct file *filp = iocb->ki_filp;
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct pipe_inode_info *pipe;
        ssize_t ret;
        int do_wakeup;
@@ -677,7 +677,7 @@ bad_pipe_w(struct file *filp, const char __user *buf, size_t count,
 static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct pipe_inode_info *pipe;
        int count, buf, nrbufs;
@@ -705,7 +705,7 @@ static unsigned int
 pipe_poll(struct file *filp, poll_table *wait)
 {
        unsigned int mask;
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct pipe_inode_info *pipe = inode->i_pipe;
        int nrbufs;
@@ -758,7 +758,7 @@ pipe_release(struct inode *inode, int decr, int decw)
 static int
 pipe_read_fasync(int fd, struct file *filp, int on)
 {
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        int retval;
        mutex_lock(&inode->i_mutex);
@@ -772,7 +772,7 @@ pipe_read_fasync(int fd, struct file *filp, int on)
 static int
 pipe_write_fasync(int fd, struct file *filp, int on)
 {
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        int retval;
        mutex_lock(&inode->i_mutex);
@@ -786,7 +786,7 @@ pipe_write_fasync(int fd, struct file *filp, int on)
 static int
 pipe_rdwr_fasync(int fd, struct file *filp, int on)
 {
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct pipe_inode_info *pipe = inode->i_pipe;
        int retval;
@@ -1037,13 +1037,13 @@ int create_pipe_files(struct file **res, int flags)
        err = -ENFILE;
        f = alloc_file(&path, FMODE_WRITE, &write_pipefifo_fops);
-        if (!f)
+        if (IS_ERR(f))
                goto err_dentry;
        f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT));
        res[0] = alloc_file(&path, FMODE_READ, &read_pipefifo_fops);
-        if (!res[0])
+        if (IS_ERR(res[0]))
                goto err_file;
        path_get(&path);
@@ -1226,7 +1226,7 @@ int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf,
 */
 struct pipe_inode_info *get_pipe_info(struct file *file)
 {
-        struct inode *i = file->f_path.dentry->d_inode;
+        struct inode *i = file_inode(file);
        return S_ISFIFO(i->i_mode) ? i->i_pipe : NULL;
 }
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index 981b05601931..712f24db9600 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -8,7 +8,8 @@ proc-y			:= nommu.o task_nommu.o
 proc-$(CONFIG_MMU)      := mmu.o task_mmu.o
 proc-y       += inode.o root.o base.o generic.o array.o \
-                proc_tty.o fd.o
+                fd.o
+proc-$(CONFIG_TTY)      += proc_tty.o
 proc-y  += cmdline.o
 proc-y  += consoles.o
 proc-y  += cpuinfo.o
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 6a91e6ffbcbd..f7ed9ee46eb9 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -449,7 +449,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
                        do {
                                min_flt += t->min_flt;
                                maj_flt += t->maj_flt;
-                                gtime += t->gtime;
+                                gtime += task_gtime(t);
                                t = next_thread(t);
                        } while (t != task);
@@ -472,7 +472,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
                min_flt = task->min_flt;
                maj_flt = task->maj_flt;
                task_cputime_adjusted(task, &utime, &stime);
-                gtime = task->gtime;
+                gtime = task_gtime(task);
        }
        /* scale priority and nice values from timeslices to -20..20 */
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 9b43ff77a51e..69078c7cef1f 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -73,6 +73,7 @@
 #include <linux/security.h>
 #include <linux/ptrace.h>
 #include <linux/tracehook.h>
+#include <linux/printk.h>
 #include <linux/cgroup.h>
 #include <linux/cpuset.h>
 #include <linux/audit.h>
@@ -383,7 +384,7 @@ static int lstats_open(struct inode *inode, struct file *file)
 static ssize_t lstats_write(struct file *file, const char __user *buf,
                            size_t count, loff_t *offs)
 {
-        struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
+        struct task_struct *task = get_proc_task(file_inode(file));
        if (!task)
                return -ESRCH;
@@ -602,7 +603,7 @@ static const struct inode_operations proc_def_inode_operations = {
 static ssize_t proc_info_read(struct file * file, char __user * buf,
                          size_t count, loff_t *ppos)
 {
-        struct inode * inode = file->f_path.dentry->d_inode;
+        struct inode * inode = file_inode(file);
        unsigned long page;
        ssize_t length;
        struct task_struct *task = get_proc_task(inode);
@@ -668,7 +669,7 @@ static const struct file_operations proc_single_file_operations = {
 static int __mem_open(struct inode *inode, struct file *file, unsigned int mode)
 {
-        struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
+        struct task_struct *task = get_proc_task(file_inode(file));
        struct mm_struct *mm;
        if (!task)
@@ -869,7 +870,7 @@ static const struct file_operations proc_environ_operations = {
 static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count,
                            loff_t *ppos)
 {
-        struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
+        struct task_struct *task = get_proc_task(file_inode(file));
        char buffer[PROC_NUMBUF];
        int oom_adj = OOM_ADJUST_MIN;
        size_t len;
@@ -916,7 +917,7 @@ static ssize_t oom_adj_write(struct file *file, const char __user *buf,
                goto out;
        }
-        task = get_proc_task(file->f_path.dentry->d_inode);
+        task = get_proc_task(file_inode(file));
        if (!task) {
                err = -ESRCH;
                goto out;
@@ -952,7 +953,7 @@ static ssize_t oom_adj_write(struct file *file, const char __user *buf,
         * /proc/pid/oom_adj is provided for legacy purposes, ask users to use
         * /proc/pid/oom_score_adj instead.
         */
-        printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n",
+        pr_warn_once("%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n",
                  current->comm, task_pid_nr(current), task_pid_nr(task),
                  task_pid_nr(task));
@@ -976,7 +977,7 @@ static const struct file_operations proc_oom_adj_operations = {
 static ssize_t oom_score_adj_read(struct file *file, char __user *buf,
                                        size_t count, loff_t *ppos)
 {
-        struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
+        struct task_struct *task = get_proc_task(file_inode(file));
        char buffer[PROC_NUMBUF];
        short oom_score_adj = OOM_SCORE_ADJ_MIN;
        unsigned long flags;
@@ -1019,7 +1020,7 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
                goto out;
        }
-        task = get_proc_task(file->f_path.dentry->d_inode);
+        task = get_proc_task(file_inode(file));
        if (!task) {
                err = -ESRCH;
                goto out;
@@ -1067,7 +1068,7 @@ static const struct file_operations proc_oom_score_adj_operations = {
 static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
                                  size_t count, loff_t *ppos)
 {
-        struct inode * inode = file->f_path.dentry->d_inode;
+        struct inode * inode = file_inode(file);
        struct task_struct *task = get_proc_task(inode);
        ssize_t length;
        char tmpbuf[TMPBUFLEN];
@@ -1084,7 +1085,7 @@ static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
 static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
                                   size_t count, loff_t *ppos)
 {
-        struct inode * inode = file->f_path.dentry->d_inode;
+        struct inode * inode = file_inode(file);
        char *page, *tmp;
        ssize_t length;
        uid_t loginuid;
@@ -1142,7 +1143,7 @@ static const struct file_operations proc_loginuid_operations = {
 static ssize_t proc_sessionid_read(struct file * file, char __user * buf,
                                  size_t count, loff_t *ppos)
 {
-        struct inode * inode = file->f_path.dentry->d_inode;
+        struct inode * inode = file_inode(file);
        struct task_struct *task = get_proc_task(inode);
        ssize_t length;
        char tmpbuf[TMPBUFLEN];
@@ -1165,7 +1166,7 @@ static const struct file_operations proc_sessionid_operations = {
 static ssize_t proc_fault_inject_read(struct file * file, char __user * buf,
                                      size_t count, loff_t *ppos)
 {
-        struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
+        struct task_struct *task = get_proc_task(file_inode(file));
        char buffer[PROC_NUMBUF];
        size_t len;
        int make_it_fail;
@@ -1197,7 +1198,7 @@ static ssize_t proc_fault_inject_write(struct file * file,
        make_it_fail = simple_strtol(strstrip(buffer), &end, 0);
        if (*end)
                return -EINVAL;
-        task = get_proc_task(file->f_dentry->d_inode);
+        task = get_proc_task(file_inode(file));
        if (!task)
                return -ESRCH;
        task->make_it_fail = make_it_fail;
@@ -1237,7 +1238,7 @@ static ssize_t
 sched_write(struct file *file, const char __user *buf,
            size_t count, loff_t *offset)
 {
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(file);
        struct task_struct *p;
        p = get_proc_task(inode);
@@ -1288,7 +1289,7 @@ static ssize_t
 sched_autogroup_write(struct file *file, const char __user *buf,
            size_t count, loff_t *offset)
 {
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(file);
        struct task_struct *p;
        char buffer[PROC_NUMBUF];
        int nice;
@@ -1343,7 +1344,7 @@ static const struct file_operations proc_pid_sched_autogroup_operations = {
 static ssize_t comm_write(struct file *file, const char __user *buf,
                                size_t count, loff_t *offset)
 {
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(file);
        struct task_struct *p;
        char buffer[TASK_COMM_LEN];
@@ -1711,7 +1712,7 @@ static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags)
                return -ECHILD;
        if (!capable(CAP_SYS_ADMIN)) {
-                status = -EACCES;
+                status = -EPERM;
                goto out_notask;
        }
@@ -1844,7 +1845,7 @@ static struct dentry *proc_map_files_lookup(struct inode *dir,
        struct dentry *result;
        struct mm_struct *mm;
-        result = ERR_PTR(-EACCES);
+        result = ERR_PTR(-EPERM);
        if (!capable(CAP_SYS_ADMIN))
                goto out;
@@ -1900,7 +1901,7 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir)
        ino_t ino;
        int ret;
-        ret = -EACCES;
+        ret = -EPERM;
        if (!capable(CAP_SYS_ADMIN))
                goto out;
@@ -2146,7 +2147,7 @@ out_no_task:
 static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
                                  size_t count, loff_t *ppos)
 {
-        struct inode * inode = file->f_path.dentry->d_inode;
+        struct inode * inode = file_inode(file);
        char *p = NULL;
        ssize_t length;
        struct task_struct *task = get_proc_task(inode);
@@ -2167,7 +2168,7 @@ static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
 static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
                                   size_t count, loff_t *ppos)
 {
-        struct inode * inode = file->f_path.dentry->d_inode;
+        struct inode * inode = file_inode(file);
        char *page;
        ssize_t length;
        struct task_struct *task = get_proc_task(inode);
@@ -2256,7 +2257,7 @@ static const struct inode_operations proc_attr_dir_inode_operations = {
 static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf,
                                         size_t count, loff_t *ppos)
 {
-        struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
+        struct task_struct *task = get_proc_task(file_inode(file));
        struct mm_struct *mm;
        char buffer[PROC_NUMBUF];
        size_t len;
@@ -2308,7 +2309,7 @@ static ssize_t proc_coredump_filter_write(struct file *file,
                goto out_no_task;
        ret = -ESRCH;
-        task = get_proc_task(file->f_dentry->d_inode);
+        task = get_proc_task(file_inode(file));
        if (!task)
                goto out_no_task;
@@ -2618,6 +2619,7 @@ static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
        name.name = buf;
        name.len = snprintf(buf, sizeof(buf), "%d", pid);
+        /* no ->d_hash() rejects on procfs */
        dentry = d_hash_and_lookup(mnt->mnt_root, &name);
        if (dentry) {
                shrink_dcache_parent(dentry);
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 76ddae83daa5..4b3b3ffb52f1 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -15,6 +15,7 @@
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/printk.h>
 #include <linux/mount.h>
 #include <linux/init.h>
 #include <linux/idr.h>
@@ -42,7 +43,7 @@ static ssize_t
 __proc_file_read(struct file *file, char __user *buf, size_t nbytes,
               loff_t *ppos)
 {
-        struct inode * inode = file->f_path.dentry->d_inode;
+        struct inode * inode = file_inode(file);
        char    *page;
        ssize_t retval=0;
        int     eof=0;
@@ -132,11 +133,8 @@ __proc_file_read(struct file *file, char __user *buf, size_t nbytes,
                }
                if (start == NULL) {
-                        if (n > PAGE_SIZE) {
+                        if (n > PAGE_SIZE)      /* Apparent buffer overflow */
-                                printk(KERN_ERR
-                                       "proc_file_read: Apparent buffer overflow!\n");
                                n = PAGE_SIZE;
-                        }
                        n -= *ppos;
                        if (n <= 0)
                                break;
@@ -144,26 +142,19 @@ __proc_file_read(struct file *file, char __user *buf, size_t nbytes,
                                n = count;
                        start = page + *ppos;
                } else if (start < page) {
-                        if (n > PAGE_SIZE) {
+                        if (n > PAGE_SIZE)      /* Apparent buffer overflow */
-                                printk(KERN_ERR
-                                       "proc_file_read: Apparent buffer overflow!\n");
                                n = PAGE_SIZE;
-                        }
                        if (n > count) {
                                /*
                                 * Don't reduce n because doing so might
                                 * cut off part of a data block.
                                 */
-                                printk(KERN_WARNING
+                                pr_warn("proc_file_read: count exceeded\n");
-                                       "proc_file_read: Read count exceeded\n");
                        }
                } else /* start >= page */ {
                        unsigned long startoff = (unsigned long)(start - page);
-                        if (n > (PAGE_SIZE - startoff)) {
+                        if (n > (PAGE_SIZE - startoff)) /* buffer overflow? */
-                                printk(KERN_ERR
-                                       "proc_file_read: Apparent buffer overflow!\n");
                                n = PAGE_SIZE - startoff;
-                        }
                        if (n > count)
                                n = count;
                }
@@ -188,7 +179,7 @@ static ssize_t
 proc_file_read(struct file *file, char __user *buf, size_t nbytes,
               loff_t *ppos)
 {
-        struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+        struct proc_dir_entry *pde = PDE(file_inode(file));
        ssize_t rv = -EIO;
        spin_lock(&pde->pde_unload_lock);
@@ -209,7 +200,7 @@ static ssize_t
 proc_file_write(struct file *file, const char __user *buffer,
                size_t count, loff_t *ppos)
 {
-        struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+        struct proc_dir_entry *pde = PDE(file_inode(file));
        ssize_t rv = -EIO;
        if (pde->write_proc) {
@@ -412,8 +403,7 @@ static const struct dentry_operations proc_dentry_operations =
 struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
                struct dentry *dentry)
 {
-        struct inode *inode = NULL;
+        struct inode *inode;
-        int error = -ENOENT;
        spin_lock(&proc_subdir_lock);
        for (de = de->subdir; de ; de = de->next) {
@@ -422,22 +412,16 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
                if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
                        pde_get(de);
                        spin_unlock(&proc_subdir_lock);
-                        error = -ENOMEM;
                        inode = proc_get_inode(dir->i_sb, de);
-                        goto out_unlock;
+                        if (!inode)
+                                return ERR_PTR(-ENOMEM);
+                        d_set_d_op(dentry, &proc_dentry_operations);
+                        d_add(dentry, inode);
+                        return NULL;
                }
        }
        spin_unlock(&proc_subdir_lock);
-out_unlock:
+        return ERR_PTR(-ENOENT);
-        if (inode) {
-                d_set_d_op(dentry, &proc_dentry_operations);
-                d_add(dentry, inode);
-                return NULL;
-        }
-        if (de)
-                pde_put(de);
-        return ERR_PTR(error);
 }
 struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry,
@@ -460,7 +444,7 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
 {
        unsigned int ino;
        int i;
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        int ret = 0;
        ino = inode->i_ino;
@@ -522,7 +506,7 @@ out:
 int proc_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        return proc_readdir_de(PDE(inode), filp, dirent, filldir);
 }
@@ -576,7 +560,7 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp
        for (tmp = dir->subdir; tmp; tmp = tmp->next)
                if (strcmp(tmp->name, dp->name) == 0) {
-                        WARN(1, KERN_WARNING "proc_dir_entry '%s/%s' already registered\n",
+                        WARN(1, "proc_dir_entry '%s/%s' already registered\n",
                                dir->name, dp->name);
                        break;
                }
@@ -837,9 +821,9 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
        if (S_ISDIR(de->mode))
                parent->nlink--;
        de->nlink = 0;
-        WARN(de->subdir, KERN_WARNING "%s: removing non-empty directory "
+        WARN(de->subdir, "%s: removing non-empty directory "
-                        "'%s/%s', leaking at least '%s'\n", __func__,
+                         "'%s/%s', leaking at least '%s'\n", __func__,
-                        de->parent->name, de->name, de->subdir->name);
+                         de->parent->name, de->name, de->subdir->name);
        pde_put(de);
 }
 EXPORT_SYMBOL(remove_proc_entry);
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 439ae6886507..a86aebc9ba7c 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -13,6 +13,7 @@
 #include <linux/stat.h>
 #include <linux/completion.h>
 #include <linux/poll.h>
+#include <linux/printk.h>
 #include <linux/file.h>
 #include <linux/limits.h>
 #include <linux/init.h>
@@ -144,7 +145,7 @@ void pde_users_dec(struct proc_dir_entry *pde)
 static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence)
 {
-        struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+        struct proc_dir_entry *pde = PDE(file_inode(file));
        loff_t rv = -EINVAL;
        loff_t (*llseek)(struct file *, loff_t, int);
@@ -179,7 +180,7 @@ static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence)
 static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 {
-        struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+        struct proc_dir_entry *pde = PDE(file_inode(file));
        ssize_t rv = -EIO;
        ssize_t (*read)(struct file *, char __user *, size_t, loff_t *);
@@ -201,7 +202,7 @@ static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count,
 static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
 {
-        struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+        struct proc_dir_entry *pde = PDE(file_inode(file));
        ssize_t rv = -EIO;
        ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *);
@@ -223,7 +224,7 @@ static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t
 static unsigned int proc_reg_poll(struct file *file, struct poll_table_struct *pts)
 {
-        struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+        struct proc_dir_entry *pde = PDE(file_inode(file));
        unsigned int rv = DEFAULT_POLLMASK;
        unsigned int (*poll)(struct file *, struct poll_table_struct *);
@@ -245,7 +246,7 @@ static unsigned int proc_reg_poll(struct file *file, struct poll_table_struct *p
 static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
-        struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+        struct proc_dir_entry *pde = PDE(file_inode(file));
        long rv = -ENOTTY;
        long (*ioctl)(struct file *, unsigned int, unsigned long);
@@ -268,7 +269,7 @@ static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigne
 #ifdef CONFIG_COMPAT
 static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
-        struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+        struct proc_dir_entry *pde = PDE(file_inode(file));
        long rv = -ENOTTY;
        long (*compat_ioctl)(struct file *, unsigned int, unsigned long);
@@ -291,7 +292,7 @@ static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned
 static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma)
 {
-        struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+        struct proc_dir_entry *pde = PDE(file_inode(file));
        int rv = -EIO;
        int (*mmap)(struct file *, struct vm_area_struct *);
@@ -445,12 +446,9 @@ static const struct file_operations proc_reg_file_ops_no_compat = {
 struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
 {
-        struct inode * inode;
+        struct inode *inode = iget_locked(sb, de->low_ino);
-        inode = iget_locked(sb, de->low_ino);
+        if (inode && (inode->i_state & I_NEW)) {
-        if (!inode)
-                return NULL;
-        if (inode->i_state & I_NEW) {
                inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
                PROC_I(inode)->pde = de;
@@ -482,10 +480,12 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
        } else
               pde_put(de);
        return inode;
-}                       
+}
 int proc_fill_super(struct super_block *s)
 {
+        struct inode *root_inode;
        s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC;
        s->s_blocksize = 1024;
        s->s_blocksize_bits = 10;
@@ -494,11 +494,17 @@ int proc_fill_super(struct super_block *s)
        s->s_time_gran = 1;
        
        pde_get(&proc_root);
-        s->s_root = d_make_root(proc_get_inode(s, &proc_root));
+        root_inode = proc_get_inode(s, &proc_root);
-        if (s->s_root)
+        if (!root_inode) {
-                return 0;
+                pr_err("proc_fill_super: get root inode failed\n");
+                return -ENOMEM;
+        }
-        printk("proc_read_super: get root inode failed\n");
+        s->s_root = d_make_root(root_inode);
-        pde_put(&proc_root);
+        if (!s->s_root) {
-        return -ENOMEM;
+                pr_err("proc_fill_super: allocate dentry failed\n");
+                return -ENOMEM;
+        }
+        return 0;
 }
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 252544c05207..85ff3a4598b3 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -11,6 +11,7 @@
 #include <linux/sched.h>
 #include <linux/proc_fs.h>
+#include <linux/binfmts.h>
 struct  ctl_table_header;
 struct  mempolicy;
@@ -108,7 +109,7 @@ static inline int task_dumpable(struct task_struct *task)
        if (mm)
                dumpable = get_dumpable(mm);
        task_unlock(task);
-        if (dumpable == SUID_DUMPABLE_ENABLED)
+        if (dumpable == SUID_DUMP_USER)
                return 1;
        return 0;
 }
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index e96d4f18ca3a..eda6f017f272 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -17,6 +17,7 @@
 #include <linux/elfcore.h>
 #include <linux/vmalloc.h>
 #include <linux/highmem.h>
+#include <linux/printk.h>
 #include <linux/bootmem.h>
 #include <linux/init.h>
 #include <linux/slab.h>
@@ -619,7 +620,7 @@ static int __init proc_kcore_init(void)
        proc_root_kcore = proc_create("kcore", S_IRUSR, NULL,
                                      &proc_kcore_operations);
        if (!proc_root_kcore) {
-                printk(KERN_ERR "couldn't create /proc/kcore\n");
+                pr_err("couldn't create /proc/kcore\n");
                return 0; /* Always returns 0. */
        }
        /* Store text area if it's special */
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 80e4645f7990..1efaaa19c4f3 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -40,7 +40,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
                * sysctl_overcommit_ratio / 100) + total_swap_pages;
        cached = global_page_state(NR_FILE_PAGES) -
-                        total_swapcache_pages - i.bufferram;
+                        total_swapcache_pages() - i.bufferram;
        if (cached < 0)
                cached = 0;
@@ -109,7 +109,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
                K(i.freeram),
                K(i.bufferram),
                K(cached),
-                K(total_swapcache_pages),
+                K(total_swapcache_pages()),
                K(pages[LRU_ACTIVE_ANON]   + pages[LRU_ACTIVE_FILE]),
                K(pages[LRU_INACTIVE_ANON] + pages[LRU_INACTIVE_FILE]),
                K(pages[LRU_ACTIVE_ANON]),
@@ -158,7 +158,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
                vmi.used >> 10,
                vmi.largest_chunk >> 10
 #ifdef CONFIG_MEMORY_FAILURE
-                ,atomic_long_read(&mce_bad_pages) << (PAGE_SHIFT - 10)
+                ,atomic_long_read(&num_poisoned_pages) << (PAGE_SHIFT - 10)
 #endif
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
                ,K(global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) *
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index b1822dde55c2..ccfd99bd1c5a 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -45,7 +45,7 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region)
        file = region->vm_file;
        if (file) {
-                struct inode *inode = region->vm_file->f_path.dentry->d_inode;
+                struct inode *inode = file_inode(region->vm_file);
                dev = inode->i_sb->s_dev;
                ino = inode->i_ino;
        }
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index de20ec480fa0..30b590f5bd35 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -8,6 +8,7 @@
 #include <linux/time.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
+#include <linux/printk.h>
 #include <linux/stat.h>
 #include <linux/string.h>
 #include <linux/of.h>
@@ -110,8 +111,8 @@ void proc_device_tree_update_prop(struct proc_dir_entry *pde,
                if (ent->data == oldprop)
                        break;
        if (ent == NULL) {
-                printk(KERN_WARNING "device-tree: property \"%s\" "
+                pr_warn("device-tree: property \"%s\" does not exist\n",
-                       " does not exist\n", oldprop->name);
+                        oldprop->name);
        } else {
                ent->data = newprop;
                ent->size = newprop->length;
@@ -153,8 +154,8 @@ static const char *fixup_name(struct device_node *np, struct proc_dir_entry *de,
 realloc:
        fixed_name = kmalloc(fixup_len, GFP_KERNEL);
        if (fixed_name == NULL) {
-                printk(KERN_ERR "device-tree: Out of memory trying to fixup "
+                pr_err("device-tree: Out of memory trying to fixup "
-                                "name \"%s\"\n", name);
+                       "name \"%s\"\n", name);
                return name;
        }
@@ -175,8 +176,8 @@ retry:
                goto retry;
        }
-        printk(KERN_WARNING "device-tree: Duplicate name in %s, "
+        pr_warn("device-tree: Duplicate name in %s, renamed to \"%s\"\n",
-                        "renamed to \"%s\"\n", np->full_name, fixed_name);
+                np->full_name, fixed_name);
        return fixed_name;
 }
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index fe72cd073dea..b4ac6572474f 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -163,7 +163,7 @@ static int proc_tgid_net_readdir(struct file *filp, void *dirent,
        struct net *net;
        ret = -EINVAL;
-        net = get_proc_task_net(filp->f_path.dentry->d_inode);
+        net = get_proc_task_net(file_inode(filp));
        if (net != NULL) {
                ret = proc_readdir_de(net->proc_net, filp, dirent, filldir);
                put_net(net);
@@ -177,20 +177,6 @@ const struct file_operations proc_net_operations = {
        .readdir        = proc_tgid_net_readdir,
 };
-struct proc_dir_entry *proc_net_fops_create(struct net *net,
-        const char *name, umode_t mode, const struct file_operations *fops)
-{
-        return proc_create(name, mode, net->proc_net, fops);
-}
-EXPORT_SYMBOL_GPL(proc_net_fops_create);
-void proc_net_remove(struct net *net, const char *name)
-{
-        remove_proc_entry(name, net->proc_net);
-}
-EXPORT_SYMBOL_GPL(proc_net_remove);
 static __net_init int proc_net_ns_init(struct net *net)
 {
        struct proc_dir_entry *netd, *net_statd;
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 1827d88ad58b..ac05f33a0dde 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -5,6 +5,7 @@
 #include <linux/sysctl.h>
 #include <linux/poll.h>
 #include <linux/proc_fs.h>
+#include <linux/printk.h>
 #include <linux/security.h>
 #include <linux/sched.h>
 #include <linux/namei.h>
@@ -57,7 +58,7 @@ static void sysctl_print_dir(struct ctl_dir *dir)
 {
        if (dir->header.parent)
                sysctl_print_dir(dir->header.parent);
-        printk(KERN_CONT "%s/", dir->header.ctl_table[0].procname);
+        pr_cont("%s/", dir->header.ctl_table[0].procname);
 }
 static int namecmp(const char *name1, int len1, const char *name2, int len2)
@@ -134,9 +135,9 @@ static int insert_entry(struct ctl_table_header *head, struct ctl_table *entry)
                else if (cmp > 0)
                        p = &(*p)->rb_right;
                else {
-                        printk(KERN_ERR "sysctl duplicate entry: ");
+                        pr_err("sysctl duplicate entry: ");
                        sysctl_print_dir(head->parent);
-                        printk(KERN_CONT "/%s\n", entry->procname);
+                        pr_cont("/%s\n", entry->procname);
                        return -EEXIST;
                }
        }
@@ -478,7 +479,7 @@ out:
 static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
                size_t count, loff_t *ppos, int write)
 {
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct ctl_table_header *head = grab_header(inode);
        struct ctl_table *table = PROC_I(inode)->sysctl_entry;
        ssize_t error;
@@ -542,7 +543,7 @@ static int proc_sys_open(struct inode *inode, struct file *filp)
 static unsigned int proc_sys_poll(struct file *filp, poll_table *wait)
 {
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct ctl_table_header *head = grab_header(inode);
        struct ctl_table *table = PROC_I(inode)->sysctl_entry;
        unsigned int ret = DEFAULT_POLLMASK;
@@ -927,9 +928,9 @@ found:
        subdir->header.nreg++;
 failed:
        if (unlikely(IS_ERR(subdir))) {
-                printk(KERN_ERR "sysctl could not get directory: ");
+                pr_err("sysctl could not get directory: ");
                sysctl_print_dir(dir);
-                printk(KERN_CONT "/%*.*s %ld\n",
+                pr_cont("/%*.*s %ld\n",
                        namelen, namelen, name, PTR_ERR(subdir));
        }
        drop_sysctl_table(&dir->header);
@@ -995,8 +996,8 @@ static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...)
        vaf.fmt = fmt;
        vaf.va = &args;
-        printk(KERN_ERR "sysctl table check failed: %s/%s %pV\n",
+        pr_err("sysctl table check failed: %s/%s %pV\n",
-                path, table->procname, &vaf);
+               path, table->procname, &vaf);
        va_end(args);
        return -EINVAL;
@@ -1510,9 +1511,9 @@ static void put_links(struct ctl_table_header *header)
                        drop_sysctl_table(link_head);
                }
                else {
-                        printk(KERN_ERR "sysctl link missing during unregister: ");
+                        pr_err("sysctl link missing during unregister: ");
                        sysctl_print_dir(parent);
-                        printk(KERN_CONT "/%s\n", name);
+                        pr_cont("/%s\n", name);
                }
        }
 }
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index ca5ce7f9f800..3e636d864d56 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -271,7 +271,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
        const char *name = NULL;
        if (file) {
-                struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+                struct inode *inode = file_inode(vma->vm_file);
                dev = inode->i_sb->s_dev;
                ino = inode->i_ino;
                pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT;
@@ -743,7 +743,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
                return rv;
        if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED)
                return -EINVAL;
-        task = get_proc_task(file->f_path.dentry->d_inode);
+        task = get_proc_task(file_inode(file));
        if (!task)
                return -ESRCH;
        mm = get_task_mm(task);
@@ -1015,7 +1015,7 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
 static ssize_t pagemap_read(struct file *file, char __user *buf,
                            size_t count, loff_t *ppos)
 {
-        struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
+        struct task_struct *task = get_proc_task(file_inode(file));
        struct mm_struct *mm;
        struct pagemapread pm;
        int ret = -ESRCH;
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 1ccfa537f5f5..56123a6f462e 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -149,7 +149,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
        file = vma->vm_file;
        if (file) {
-                struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+                struct inode *inode = file_inode(vma->vm_file);
                dev = inode->i_sb->s_dev;
                ino = inode->i_ino;
                pgoff = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 0d5071d29985..b870f740ab5a 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -15,6 +15,7 @@
 #include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/highmem.h>
+#include <linux/printk.h>
 #include <linux/bootmem.h>
 #include <linux/init.h>
 #include <linux/crash_dump.h>
@@ -175,15 +176,15 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
        start = map_offset_to_paddr(*fpos, &vmcore_list, &curr_m);
        if (!curr_m)
                return -EINVAL;
-        if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen)
-                tsz = buflen;
-        /* Calculate left bytes in current memory segment. */
-        nr_bytes = (curr_m->size - (start - curr_m->paddr));
-        if (tsz > nr_bytes)
-                tsz = nr_bytes;
        while (buflen) {
+                tsz = min_t(size_t, buflen, PAGE_SIZE - (start & ~PAGE_MASK));
+                /* Calculate left bytes in current memory segment. */
+                nr_bytes = (curr_m->size - (start - curr_m->paddr));
+                if (tsz > nr_bytes)
+                        tsz = nr_bytes;
                tmp = read_from_oldmem(buffer, tsz, &start, 1);
                if (tmp < 0)
                        return tmp;
@@ -198,12 +199,6 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
                                                struct vmcore, list);
                        start = curr_m->paddr;
                }
-                if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen)
-                        tsz = buflen;
-                /* Calculate left bytes in current memory segment. */
-                nr_bytes = (curr_m->size - (start - curr_m->paddr));
-                if (tsz > nr_bytes)
-                        tsz = nr_bytes;
        }
        return acc;
 }
@@ -553,8 +548,7 @@ static int __init parse_crash_elf64_headers(void)
                ehdr.e_ehsize != sizeof(Elf64_Ehdr) ||
                ehdr.e_phentsize != sizeof(Elf64_Phdr) ||
                ehdr.e_phnum == 0) {
-                printk(KERN_WARNING "Warning: Core image elf header is not"
+                pr_warn("Warning: Core image elf header is not sane\n");
-                                        "sane\n");
                return -EINVAL;
        }
@@ -609,8 +603,7 @@ static int __init parse_crash_elf32_headers(void)
                ehdr.e_ehsize != sizeof(Elf32_Ehdr) ||
                ehdr.e_phentsize != sizeof(Elf32_Phdr) ||
                ehdr.e_phnum == 0) {
-                printk(KERN_WARNING "Warning: Core image elf header is not"
+                pr_warn("Warning: Core image elf header is not sane\n");
-                                        "sane\n");
                return -EINVAL;
        }
@@ -653,8 +646,7 @@ static int __init parse_crash_elf_headers(void)
        if (rc < 0)
                return rc;
        if (memcmp(e_ident, ELFMAG, SELFMAG) != 0) {
-                printk(KERN_WARNING "Warning: Core image elf header"
+                pr_warn("Warning: Core image elf header not found\n");
-                                        " not found\n");
                return -EINVAL;
        }
@@ -673,8 +665,7 @@ static int __init parse_crash_elf_headers(void)
                /* Determine vmcore size. */
                vmcore_size = get_vmcore_size_elf32(elfcorebuf);
        } else {
-                printk(KERN_WARNING "Warning: Core image elf header is not"
+                pr_warn("Warning: Core image elf header is not sane\n");
-                                        " sane\n");
                return -EINVAL;
        }
        return 0;
@@ -690,7 +681,7 @@ static int __init vmcore_init(void)
                return rc;
        rc = parse_crash_elf_headers();
        if (rc) {
-                printk(KERN_WARNING "Kdump: vmcore not initialized\n");
+                pr_warn("Kdump: vmcore not initialized\n");
                return rc;
        }
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 67de74ca85f4..e4bcb2cf055a 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -418,9 +418,25 @@ static struct file_system_type pstore_fs_type = {
        .kill_sb        = pstore_kill_sb,
 };
+static struct kobject *pstore_kobj;
 static int __init init_pstore_fs(void)
 {
-        return register_filesystem(&pstore_fs_type);
+        int err = 0;
+        /* Create a convenient mount point for people to access pstore */
+        pstore_kobj = kobject_create_and_add("pstore", fs_kobj);
+        if (!pstore_kobj) {
+                err = -ENOMEM;
+                goto out;
+        }
+        err = register_filesystem(&pstore_fs_type);
+        if (err < 0)
+                kobject_put(pstore_kobj);
+out:
+        return err;
 }
 module_init(init_pstore_fs)
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 5ea2e77ff023..86d1038b5a12 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -96,6 +96,27 @@ static const char *get_reason_str(enum kmsg_dump_reason reason)
        }
 }
+bool pstore_cannot_block_path(enum kmsg_dump_reason reason)
+{
+        /*
+         * In case of NMI path, pstore shouldn't be blocked
+         * regardless of reason.
+         */
+        if (in_nmi())
+                return true;
+        switch (reason) {
+        /* In panic case, other cpus are stopped by smp_send_stop(). */
+        case KMSG_DUMP_PANIC:
+        /* Emergency restart shouldn't be blocked by spin lock. */
+        case KMSG_DUMP_EMERG:
+                return true;
+        default:
+                return false;
+        }
+}
+EXPORT_SYMBOL_GPL(pstore_cannot_block_path);
 /*
 * callback from kmsg_dump. (s2,l2) has the most recently
 * written bytes, older bytes are in (s1,l1). Save as much
@@ -114,10 +135,12 @@ static void pstore_dump(struct kmsg_dumper *dumper,
        why = get_reason_str(reason);
-        if (in_nmi()) {
+        if (pstore_cannot_block_path(reason)) {
-                is_locked = spin_trylock(&psinfo->buf_lock);
+                is_locked = spin_trylock_irqsave(&psinfo->buf_lock, flags);
-                if (!is_locked)
+                if (!is_locked) {
-                        pr_err("pstore dump routine blocked in NMI, may corrupt error record\n");
+                        pr_err("pstore dump routine blocked in %s path, may corrupt error record\n"
+                                       , in_nmi() ? "NMI" : why);
+                }
        } else
                spin_lock_irqsave(&psinfo->buf_lock, flags);
        oopscount++;
@@ -143,9 +166,9 @@ static void pstore_dump(struct kmsg_dumper *dumper,
                total += hsize + len;
                part++;
        }
-        if (in_nmi()) {
+        if (pstore_cannot_block_path(reason)) {
                if (is_locked)
-                        spin_unlock(&psinfo->buf_lock);
+                        spin_unlock_irqrestore(&psinfo->buf_lock, flags);
        } else
                spin_unlock_irqrestore(&psinfo->buf_lock, flags);
 }
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index f883e7e74305..288f068740f6 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -167,12 +167,16 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type,
 static size_t ramoops_write_kmsg_hdr(struct persistent_ram_zone *prz)
 {
        char *hdr;
-        struct timeval timestamp;
+        struct timespec timestamp;
        size_t len;
-        do_gettimeofday(&timestamp);
+        /* Report zeroed timestamp if called before timekeeping has resumed. */
+        if (__getnstimeofday(&timestamp)) {
+                timestamp.tv_sec = 0;
+                timestamp.tv_nsec = 0;
+        }
        hdr = kasprintf(GFP_ATOMIC, RAMOOPS_KERNMSG_HDR "%lu.%lu\n",
-                (long)timestamp.tv_sec, (long)timestamp.tv_usec);
+                (long)timestamp.tv_sec, (long)(timestamp.tv_nsec / 1000));
        WARN_ON_ONCE(!hdr);
        len = hdr ? strlen(hdr) : 0;
        persistent_ram_write(prz, hdr, len);
@@ -291,9 +295,8 @@ static void ramoops_free_przs(struct ramoops_context *cxt)
        kfree(cxt->przs);
 }
-static int __devinit ramoops_init_przs(struct device *dev,
+static int ramoops_init_przs(struct device *dev, struct ramoops_context *cxt,
-                                       struct ramoops_context *cxt,
+                             phys_addr_t *paddr, size_t dump_mem_sz)
-                                       phys_addr_t *paddr, size_t dump_mem_sz)
 {
        int err = -ENOMEM;
        int i;
@@ -336,10 +339,9 @@ fail_prz:
        return err;
 }
-static int __devinit ramoops_init_prz(struct device *dev,
+static int ramoops_init_prz(struct device *dev, struct ramoops_context *cxt,
-                                      struct ramoops_context *cxt,
+                            struct persistent_ram_zone **prz,
-                                      struct persistent_ram_zone **prz,
+                            phys_addr_t *paddr, size_t sz, u32 sig)
-                                      phys_addr_t *paddr, size_t sz, u32 sig)
 {
        if (!sz)
                return 0;
@@ -367,7 +369,7 @@ static int __devinit ramoops_init_prz(struct device *dev,
        return 0;
 }
-static int __devinit ramoops_probe(struct platform_device *pdev)
+static int ramoops_probe(struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
        struct ramoops_platform_data *pdata = pdev->dev.platform_data;
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index eecd2a8a84dd..0306303be372 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -390,8 +390,8 @@ static int persistent_ram_buffer_map(phys_addr_t start, phys_addr_t size,
        return 0;
 }
-static int __devinit persistent_ram_post_init(struct persistent_ram_zone *prz,
+static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig,
-                                              u32 sig, int ecc_size)
+                                    int ecc_size)
 {
        int ret;
@@ -443,9 +443,8 @@ void persistent_ram_free(struct persistent_ram_zone *prz)
        kfree(prz);
 }
-struct persistent_ram_zone * __devinit persistent_ram_new(phys_addr_t start,
+struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
-                                                          size_t size, u32 sig,
+                                               u32 sig, int ecc_size)
-                                                          int ecc_size)
 {
        struct persistent_ram_zone *prz;
        int ret = -ENOMEM;
diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c
index 7b0329468a5d..28ce014b3cef 100644
--- a/fs/qnx4/dir.c
+++ b/fs/qnx4/dir.c
@@ -16,7 +16,7 @@
 static int qnx4_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        unsigned int offset;
        struct buffer_head *bh;
        struct qnx4_inode_entry *de;
diff --git a/fs/qnx6/dir.c b/fs/qnx6/dir.c
index dc597353db3b..8798d065e400 100644
--- a/fs/qnx6/dir.c
+++ b/fs/qnx6/dir.c
@@ -117,7 +117,7 @@ static int qnx6_dir_longfilename(struct inode *inode,
 static int qnx6_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct super_block *s = inode->i_sb;
        struct qnx6_sb_info *sbi = QNX6_SB(s);
        loff_t pos = filp->f_pos & (QNX6_DIR_ENTRY_SIZE - 1);
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index b6addf560483..57199a52a351 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -285,7 +285,7 @@ static struct buffer_head *qnx6_check_first_superblock(struct super_block *s,
                if (fs32_to_cpu(sbi, sb->sb_magic) == QNX6_SUPER_MAGIC) {
                        /* we got a big endian fs */
                        QNX6DEBUG((KERN_INFO "qnx6: fs got different"
-                                        " endianess.\n"));
+                                        " endianness.\n"));
                        return bh;
                } else
                        sbi->s_bytesex = BYTESEX_LE;
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index d5378d028589..8d5b438cc188 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -202,7 +202,7 @@ unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
                                            unsigned long pgoff, unsigned long flags)
 {
        unsigned long maxpages, lpages, nr, loop, ret;
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(file);
        struct page **pages = NULL, **ptr, *page;
        loff_t isize;
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index eab8c09d3801..c24f1e10b946 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -260,6 +260,7 @@ static struct file_system_type ramfs_fs_type = {
        .name           = "ramfs",
        .mount          = ramfs_mount,
        .kill_sb        = ramfs_kill_sb,
+        .fs_flags       = FS_USERNS_MOUNT,
 };
 static struct file_system_type rootfs_fs_type = {
        .name           = "rootfs",
diff --git a/fs/read_write.c b/fs/read_write.c
index bb34af315280..3ae6dbe828bf 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -163,7 +163,7 @@ EXPORT_SYMBOL(no_llseek);
 loff_t default_llseek(struct file *file, loff_t offset, int whence)
 {
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(file);
        loff_t retval;
        mutex_lock(&inode->i_mutex);
@@ -290,7 +290,7 @@ int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count
        loff_t pos;
        int retval = -EINVAL;
-        inode = file->f_path.dentry->d_inode;
+        inode = file_inode(file);
        if (unlikely((ssize_t) count < 0))
                return retval;
        pos = *ppos;
@@ -901,8 +901,8 @@ ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count,
        if (!(out.file->f_mode & FMODE_WRITE))
                goto fput_out;
        retval = -EINVAL;
-        in_inode = in.file->f_path.dentry->d_inode;
+        in_inode = file_inode(in.file);
-        out_inode = out.file->f_path.dentry->d_inode;
+        out_inode = file_inode(out.file);
        retval = rw_verify_area(WRITE, out.file, &out.file->f_pos, count);
        if (retval < 0)
                goto fput_out;
diff --git a/fs/readdir.c b/fs/readdir.c
index 5e69ef533b77..fee38e04fae4 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -22,7 +22,7 @@
 int vfs_readdir(struct file *file, filldir_t filler, void *buf)
 {
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(file);
        int res = -ENOTDIR;
        if (!file->f_op || !file->f_op->readdir)
                goto out;
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 50302d6f8895..6165bd4784f6 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -268,7 +268,7 @@ static ssize_t reiserfs_file_write(struct file *file,	/* the file we are going t
                                                         * new current position before returning. */
                                   )
 {
-        struct inode *inode = file->f_path.dentry->d_inode;     // Inode of the file that we are writing to.
+        struct inode *inode = file_inode(file); // Inode of the file that we are writing to.
        /* To simplify coding at this time, we store
           locked pages in array for now */
        struct reiserfs_transaction_handle th;
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 95d7680ead47..ea5061fd4f3e 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1603,10 +1603,10 @@ int reiserfs_encode_fh(struct inode *inode, __u32 * data, int *lenp,
        if (parent && (maxlen < 5)) {
                *lenp = 5;
-                return 255;
+                return FILEID_INVALID;
        } else if (maxlen < 3) {
                *lenp = 3;
-                return 255;
+                return FILEID_INVALID;
        }
        data[0] = inode->i_ino;
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 0c2185042d5f..15cb5fe6b425 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -21,7 +21,7 @@
 */
 long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        unsigned int flags;
        int err = 0;
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index e60e87035bb3..9cc0740adffa 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -281,7 +281,7 @@ static int show_oidmap(struct seq_file *m, struct super_block *sb)
        }
 #if defined( REISERFS_USE_OIDMAPF )
        if (sb_info->oidmap.use_file && (sb_info->oidmap.mapf != NULL)) {
-                loff_t size = sb_info->oidmap.mapf->f_path.dentry->d_inode->i_size;
+                loff_t size = file_inode(sb_info->oidmap.mapf)->i_size;
                total_used += size / sizeof(reiserfs_oidinterval_d_t);
        }
 #endif
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index fd7c5f60b46b..7e8d3a80bdab 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -147,7 +147,7 @@ static const struct address_space_operations romfs_aops = {
 */
 static int romfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-        struct inode *i = filp->f_dentry->d_inode;
+        struct inode *i = file_inode(filp);
        struct romfs_inode ri;
        unsigned long offset, maxoff;
        int j, ino, nextfh;
diff --git a/fs/select.c b/fs/select.c
index 2ef72d965036..8c1c96c27062 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -26,6 +26,7 @@
 #include <linux/fs.h>
 #include <linux/rcupdate.h>
 #include <linux/hrtimer.h>
+#include <linux/sched/rt.h>
 #include <asm/uaccess.h>
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 9d863fb501f9..15c6304bab71 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -296,7 +296,7 @@ EXPORT_SYMBOL(seq_read);
 *      seq_lseek -     ->llseek() method for sequential files.
 *      @file: the file in question
 *      @offset: new position
- *      @origin: 0 for absolute, 1 for relative position
+ *      @whence: 0 for absolute, 1 for relative position
 *
 *      Ready-made ->f_op->llseek()
 */
@@ -308,27 +308,27 @@ loff_t seq_lseek(struct file *file, loff_t offset, int whence)
        mutex_lock(&m->lock);
        m->version = file->f_version;
        switch (whence) {
-                case 1:
+        case SEEK_CUR:
-                        offset += file->f_pos;
+                offset += file->f_pos;
-                case 0:
+        case SEEK_SET:
-                        if (offset < 0)
+                if (offset < 0)
-                                break;
+                        break;
-                        retval = offset;
+                retval = offset;
-                        if (offset != m->read_pos) {
+                if (offset != m->read_pos) {
-                                while ((retval=traverse(m, offset)) == -EAGAIN)
+                        while ((retval = traverse(m, offset)) == -EAGAIN)
-                                        ;
+                                ;
-                                if (retval) {
+                        if (retval) {
-                                        /* with extreme prejudice... */
+                                /* with extreme prejudice... */
-                                        file->f_pos = 0;
+                                file->f_pos = 0;
-                                        m->read_pos = 0;
+                                m->read_pos = 0;
-                                        m->version = 0;
+                                m->version = 0;
-                                        m->index = 0;
+                                m->index = 0;
-                                        m->count = 0;
+                                m->count = 0;
-                                } else {
+                        } else {
-                                        m->read_pos = offset;
+                                m->read_pos = offset;
-                                        retval = file->f_pos = offset;
+                                retval = file->f_pos = offset;
-                                }
                        }
+                }
        }
        file->f_version = m->version;
        mutex_unlock(&m->lock);
diff --git a/fs/splice.c b/fs/splice.c
index 8890604e3fcd..718bd0056384 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -569,7 +569,7 @@ static ssize_t kernel_readv(struct file *file, const struct iovec *vec,
        return res;
 }
-static ssize_t kernel_write(struct file *file, const char *buf, size_t count,
+ssize_t kernel_write(struct file *file, const char *buf, size_t count,
                            loff_t pos)
 {
        mm_segment_t old_fs;
@@ -578,11 +578,12 @@ static ssize_t kernel_write(struct file *file, const char *buf, size_t count,
        old_fs = get_fs();
        set_fs(get_ds());
        /* The cast to a user pointer is valid due to the set_fs() */
-        res = vfs_write(file, (const char __user *)buf, count, &pos);
+        res = vfs_write(file, (__force const char __user *)buf, count, &pos);
        set_fs(old_fs);
        return res;
 }
+EXPORT_SYMBOL(kernel_write);
 ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
                                 struct pipe_inode_info *pipe, size_t len,
@@ -696,8 +697,10 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
                return -EINVAL;
        more = (sd->flags & SPLICE_F_MORE) ? MSG_MORE : 0;
-        if (sd->len < sd->total_len)
+        if (sd->len < sd->total_len && pipe->nrbufs > 1)
                more |= MSG_SENDPAGE_NOTLAST;
        return file->f_op->sendpage(file, buf->page, buf->offset,
                                    sd->len, &pos, more);
 }
@@ -1168,7 +1171,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
         * randomly drop data for eg socket -> socket splicing. Use the
         * piped splicing for that!
         */
-        i_mode = in->f_path.dentry->d_inode->i_mode;
+        i_mode = file_inode(in)->i_mode;
        if (unlikely(!S_ISREG(i_mode) && !S_ISBLK(i_mode)))
                return -EINVAL;
diff --git a/fs/squashfs/dir.c b/fs/squashfs/dir.c
index b381305c9a47..57dc70ebbb19 100644
--- a/fs/squashfs/dir.c
+++ b/fs/squashfs/dir.c
@@ -102,7 +102,7 @@ static int get_dir_index_using_offset(struct super_block *sb,
 static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir)
 {
-        struct inode *inode = file->f_dentry->d_inode;
+        struct inode *inode = file_inode(file);
        struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
        u64 block = squashfs_i(inode)->start + msblk->directory_table;
        int offset = squashfs_i(inode)->offset, length, dir_count, size,
diff --git a/fs/stat.c b/fs/stat.c
index 14f45459c83d..04ce1ac20d20 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -37,17 +37,17 @@ void generic_fillattr(struct inode *inode, struct kstat *stat)
 EXPORT_SYMBOL(generic_fillattr);
-int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+int vfs_getattr(struct path *path, struct kstat *stat)
 {
-        struct inode *inode = dentry->d_inode;
+        struct inode *inode = path->dentry->d_inode;
        int retval;
-        retval = security_inode_getattr(mnt, dentry);
+        retval = security_inode_getattr(path->mnt, path->dentry);
        if (retval)
                return retval;
        if (inode->i_op->getattr)
-                return inode->i_op->getattr(mnt, dentry, stat);
+                return inode->i_op->getattr(path->mnt, path->dentry, stat);
        generic_fillattr(inode, stat);
        return 0;
@@ -61,8 +61,7 @@ int vfs_fstat(unsigned int fd, struct kstat *stat)
        int error = -EBADF;
        if (f.file) {
-                error = vfs_getattr(f.file->f_path.mnt, f.file->f_path.dentry,
+                error = vfs_getattr(&f.file->f_path, stat);
-                                    stat);
                fdput(f);
        }
        return error;
@@ -89,7 +88,7 @@ retry:
        if (error)
                goto out;
-        error = vfs_getattr(path.mnt, path.dentry, stat);
+        error = vfs_getattr(&path, stat);
        path_put(&path);
        if (retry_estale(error, lookup_flags)) {
                lookup_flags |= LOOKUP_REVAL;
diff --git a/fs/super.c b/fs/super.c
index 12f123712161..7465d4364208 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -447,14 +447,13 @@ struct super_block *sget(struct file_system_type *type,
                        void *data)
 {
        struct super_block *s = NULL;
-        struct hlist_node *node;
        struct super_block *old;
        int err;
 retry:
        spin_lock(&sb_lock);
        if (test) {
-                hlist_for_each_entry(old, node, &type->fs_supers, s_instances) {
+                hlist_for_each_entry(old, &type->fs_supers, s_instances) {
                        if (!test(old, data))
                                continue;
                        if (!grab_super(old))
@@ -554,10 +553,9 @@ void iterate_supers_type(struct file_system_type *type,
        void (*f)(struct super_block *, void *), void *arg)
 {
        struct super_block *sb, *p = NULL;
-        struct hlist_node *node;
        spin_lock(&sb_lock);
-        hlist_for_each_entry(sb, node, &type->fs_supers, s_instances) {
+        hlist_for_each_entry(sb, &type->fs_supers, s_instances) {
                sb->s_count++;
                spin_unlock(&sb_lock);
@@ -842,7 +840,7 @@ int get_anon_bdev(dev_t *p)
        else if (error)
                return -EAGAIN;
-        if ((dev & MAX_IDR_MASK) == (1 << MINORBITS)) {
+        if (dev == (1 << MINORBITS)) {
                spin_lock(&unnamed_dev_lock);
                ida_remove(&unnamed_dev_ida, dev);
                if (unnamed_dev_start > dev)
diff --git a/fs/sync.c b/fs/sync.c
index 14eefeb44636..2c5d6639a66a 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -332,7 +332,7 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
        if (!f.file)
                goto out;
-        i_mode = f.file->f_path.dentry->d_inode->i_mode;
+        i_mode = file_inode(f.file)->i_mode;
        ret = -ESPIPE;
        if (!S_ISREG(i_mode) && !S_ISBLK(i_mode) && !S_ISDIR(i_mode) &&
                        !S_ISLNK(i_mode))
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index 614b2b544880..15c68f9489ae 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -70,7 +70,7 @@ static ssize_t
 read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off)
 {
        struct bin_buffer *bb = file->private_data;
-        int size = file->f_path.dentry->d_inode->i_size;
+        int size = file_inode(file)->i_size;
        loff_t offs = *off;
        int count = min_t(size_t, bytes, PAGE_SIZE);
        char *temp;
@@ -140,7 +140,7 @@ static ssize_t write(struct file *file, const char __user *userbuf,
                     size_t bytes, loff_t *off)
 {
        struct bin_buffer *bb = file->private_data;
-        int size = file->f_path.dentry->d_inode->i_size;
+        int size = file_inode(file)->i_size;
        loff_t offs = *off;
        int count = min_t(size_t, bytes, PAGE_SIZE);
        char *temp;
@@ -461,15 +461,14 @@ const struct file_operations bin_fops = {
 void unmap_bin_file(struct sysfs_dirent *attr_sd)
 {
        struct bin_buffer *bb;
-        struct hlist_node *tmp;
        if (sysfs_type(attr_sd) != SYSFS_KOBJ_BIN_ATTR)
                return;
        mutex_lock(&sysfs_bin_lock);
-        hlist_for_each_entry(bb, tmp, &attr_sd->s_bin_attr.buffers, list) {
+        hlist_for_each_entry(bb, &attr_sd->s_bin_attr.buffers, list) {
-                struct inode *inode = bb->file->f_path.dentry->d_inode;
+                struct inode *inode = file_inode(bb->file);
                unmap_mapping_range(inode->i_mapping, 0, 0, 1);
        }
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 2df555c66d57..aec3d5c98c94 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -205,6 +205,48 @@ void sysfs_unmerge_group(struct kobject *kobj,
 }
 EXPORT_SYMBOL_GPL(sysfs_unmerge_group);
+/**
+ * sysfs_add_link_to_group - add a symlink to an attribute group.
+ * @kobj:       The kobject containing the group.
+ * @group_name: The name of the group.
+ * @target:     The target kobject of the symlink to create.
+ * @link_name:  The name of the symlink to create.
+ */
+int sysfs_add_link_to_group(struct kobject *kobj, const char *group_name,
+                            struct kobject *target, const char *link_name)
+{
+        struct sysfs_dirent *dir_sd;
+        int error = 0;
+        dir_sd = sysfs_get_dirent(kobj->sd, NULL, group_name);
+        if (!dir_sd)
+                return -ENOENT;
+        error = sysfs_create_link_sd(dir_sd, target, link_name);
+        sysfs_put(dir_sd);
+        return error;
+}
+EXPORT_SYMBOL_GPL(sysfs_add_link_to_group);
+/**
+ * sysfs_remove_link_from_group - remove a symlink from an attribute group.
+ * @kobj:       The kobject containing the group.
+ * @group_name: The name of the group.
+ * @link_name:  The name of the symlink to remove.
+ */
+void sysfs_remove_link_from_group(struct kobject *kobj, const char *group_name,
+                                  const char *link_name)
+{
+        struct sysfs_dirent *dir_sd;
+        dir_sd = sysfs_get_dirent(kobj->sd, NULL, group_name);
+        if (dir_sd) {
+                sysfs_hash_and_remove(dir_sd, NULL, link_name);
+                sysfs_put(dir_sd);
+        }
+}
+EXPORT_SYMBOL_GPL(sysfs_remove_link_from_group);
 EXPORT_SYMBOL_GPL(sysfs_create_group);
 EXPORT_SYMBOL_GPL(sysfs_update_group);
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index db940a9be045..8d924b5ec733 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -10,7 +10,7 @@
 * Please see Documentation/filesystems/sysfs.txt for more information.
 */
-#define DEBUG 
+#define DEBUG
 #include <linux/fs.h>
 #include <linux/mount.h>
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 3c9eb5624f5e..8c940df97a52 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -21,26 +21,17 @@
 #include "sysfs.h"
-static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target,
+static int sysfs_do_create_link_sd(struct sysfs_dirent *parent_sd,
-                                const char *name, int warn)
+                                   struct kobject *target,
+                                   const char *name, int warn)
 {
-        struct sysfs_dirent *parent_sd = NULL;
        struct sysfs_dirent *target_sd = NULL;
        struct sysfs_dirent *sd = NULL;
        struct sysfs_addrm_cxt acxt;
        enum kobj_ns_type ns_type;
        int error;
-        BUG_ON(!name);
+        BUG_ON(!name || !parent_sd);
-        if (!kobj)
-                parent_sd = &sysfs_root;
-        else
-                parent_sd = kobj->sd;
-        error = -EFAULT;
-        if (!parent_sd)
-                goto out_put;
        /* target->sd can go away beneath us but is protected with
         * sysfs_assoc_lock.  Fetch target_sd from it.
@@ -96,6 +87,34 @@ static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target,
 }
 /**
+ *      sysfs_create_link_sd - create symlink to a given object.
+ *      @sd:            directory we're creating the link in.
+ *      @target:        object we're pointing to.
+ *      @name:          name of the symlink.
+ */
+int sysfs_create_link_sd(struct sysfs_dirent *sd, struct kobject *target,
+                         const char *name)
+{
+        return sysfs_do_create_link_sd(sd, target, name, 1);
+}
+static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target,
+                                const char *name, int warn)
+{
+        struct sysfs_dirent *parent_sd = NULL;
+        if (!kobj)
+                parent_sd = &sysfs_root;
+        else
+                parent_sd = kobj->sd;
+        if (!parent_sd)
+                return -EFAULT;
+        return sysfs_do_create_link_sd(parent_sd, target, name, warn);
+}
+/**
 *      sysfs_create_link - create symlink between two objects.
 *      @kobj:  object whose directory we're creating the link in.
 *      @target:        object we're pointing to.
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index d73c0932bbd6..d1e4043eb0c3 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -240,3 +240,5 @@ void unmap_bin_file(struct sysfs_dirent *attr_sd);
 * symlink.c
 */
 extern const struct inode_operations sysfs_symlink_inode_operations;
+int sysfs_create_link_sd(struct sysfs_dirent *sd, struct kobject *target,
+                         const char *name);
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index a77c42157620..3799e8dac3eb 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -68,7 +68,7 @@ static struct page * dir_get_page(struct inode *dir, unsigned long n)
 static int sysv_readdir(struct file * filp, void * dirent, filldir_t filldir)
 {
        unsigned long pos = filp->f_pos;
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct super_block *sb = inode->i_sb;
        unsigned offset = pos & ~PAGE_CACHE_MASK;
        unsigned long n = pos >> PAGE_CACHE_SHIFT;
diff --git a/fs/timerfd.c b/fs/timerfd.c
index d03822bbf190..0e606b12a59d 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -22,6 +22,7 @@
 #include <linux/anon_inodes.h>
 #include <linux/timerfd.h>
 #include <linux/syscalls.h>
+#include <linux/compat.h>
 #include <linux/rcupdate.h>
 struct timerfd_ctx {
@@ -278,21 +279,17 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
        return ufd;
 }
-SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
+static int do_timerfd_settime(int ufd, int flags, 
-                const struct itimerspec __user *, utmr,
+                const struct itimerspec *new,
-                struct itimerspec __user *, otmr)
+                struct itimerspec *old)
 {
        struct fd f;
        struct timerfd_ctx *ctx;
-        struct itimerspec ktmr, kotmr;
        int ret;
-        if (copy_from_user(&ktmr, utmr, sizeof(ktmr)))
-                return -EFAULT;
        if ((flags & ~TFD_SETTIME_FLAGS) ||
-            !timespec_valid(&ktmr.it_value) ||
+            !timespec_valid(&new->it_value) ||
-            !timespec_valid(&ktmr.it_interval))
+            !timespec_valid(&new->it_interval))
                return -EINVAL;
        ret = timerfd_fget(ufd, &f);
@@ -323,27 +320,23 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
        if (ctx->expired && ctx->tintv.tv64)
                hrtimer_forward_now(&ctx->tmr, ctx->tintv);
-        kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
+        old->it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
-        kotmr.it_interval = ktime_to_timespec(ctx->tintv);
+        old->it_interval = ktime_to_timespec(ctx->tintv);
        /*
         * Re-program the timer to the new value ...
         */
-        ret = timerfd_setup(ctx, flags, &ktmr);
+        ret = timerfd_setup(ctx, flags, new);
        spin_unlock_irq(&ctx->wqh.lock);
        fdput(f);
-        if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr)))
-                return -EFAULT;
        return ret;
 }
-SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
+static int do_timerfd_gettime(int ufd, struct itimerspec *t)
 {
        struct fd f;
        struct timerfd_ctx *ctx;
-        struct itimerspec kotmr;
        int ret = timerfd_fget(ufd, &f);
        if (ret)
                return ret;
@@ -356,11 +349,65 @@ SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
                        hrtimer_forward_now(&ctx->tmr, ctx->tintv) - 1;
                hrtimer_restart(&ctx->tmr);
        }
-        kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
+        t->it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
-        kotmr.it_interval = ktime_to_timespec(ctx->tintv);
+        t->it_interval = ktime_to_timespec(ctx->tintv);
        spin_unlock_irq(&ctx->wqh.lock);
        fdput(f);
+        return 0;
+}
+SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
+                const struct itimerspec __user *, utmr,
+                struct itimerspec __user *, otmr)
+{
+        struct itimerspec new, old;
+        int ret;
+        if (copy_from_user(&new, utmr, sizeof(new)))
+                return -EFAULT;
+        ret = do_timerfd_settime(ufd, flags, &new, &old);
+        if (ret)
+                return ret;
+        if (otmr && copy_to_user(otmr, &old, sizeof(old)))
+                return -EFAULT;
+        return ret;
+}
+SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
+{
+        struct itimerspec kotmr;
+        int ret = do_timerfd_gettime(ufd, &kotmr);
+        if (ret)
+                return ret;
        return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0;
 }
+#ifdef COMPAT
+COMPAT_SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
+                const struct itimerspec __user *, utmr,
+                struct itimerspec __user *, otmr)
+{
+        struct itimerspec new, old;
+        int ret;
+        if (get_compat_itimerspec(&new, utmr))
+                return -EFAULT;
+        ret = do_timerfd_settime(ufd, flags, &new, &old);
+        if (ret)
+                return ret;
+        if (otmr && put_compat_itimerspec(otmr, &old))
+                return -EFAULT;
+        return ret;
+}
+COMPAT_SYSCALL_DEFINE2(timerfd_gettime, int, ufd,
+                struct itimerspec __user *, otmr)
+{
+        struct itimerspec kotmr;
+        int ret = do_timerfd_gettime(ufd, &kotmr);
+        if (ret)
+                return ret;
+        return put_compat_itimerspec(otmr, &t) ? -EFAULT: 0;
+}
+#endif
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 12817ffc7345..7f60e900edff 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -2459,7 +2459,7 @@ error_dump:
 static inline int chance(unsigned int n, unsigned int out_of)
 {
-        return !!((random32() % out_of) + 1 <= n);
+        return !!((prandom_u32() % out_of) + 1 <= n);
 }
@@ -2477,13 +2477,13 @@ static int power_cut_emulated(struct ubifs_info *c, int lnum, int write)
                        if (chance(1, 2)) {
                                d->pc_delay = 1;
                                /* Fail withing 1 minute */
-                                delay = random32() % 60000;
+                                delay = prandom_u32() % 60000;
                                d->pc_timeout = jiffies;
                                d->pc_timeout += msecs_to_jiffies(delay);
                                ubifs_warn("failing after %lums", delay);
                        } else {
                                d->pc_delay = 2;
-                                delay = random32() % 10000;
+                                delay = prandom_u32() % 10000;
                                /* Fail within 10000 operations */
                                d->pc_cnt_max = delay;
                                ubifs_warn("failing after %lu calls", delay);
@@ -2563,7 +2563,7 @@ static int corrupt_data(const struct ubifs_info *c, const void *buf,
        unsigned int from, to, ffs = chance(1, 2);
        unsigned char *p = (void *)buf;
-        from = random32() % (len + 1);
+        from = prandom_u32() % (len + 1);
        /* Corruption may only span one max. write unit */
        to = min(len, ALIGN(from, c->max_write_size));
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 8a574776a493..de08c92f2e23 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -352,7 +352,7 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir)
        struct qstr nm;
        union ubifs_key key;
        struct ubifs_dent_node *dent;
-        struct inode *dir = file->f_path.dentry->d_inode;
+        struct inode *dir = file_inode(file);
        struct ubifs_info *c = dir->i_sb->s_fs_info;
        dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, file->f_pos);
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 5bc77817f382..f12189d2db1d 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1444,7 +1444,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma,
                                 struct vm_fault *vmf)
 {
        struct page *page = vmf->page;
-        struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(vma->vm_file);
        struct ubifs_info *c = inode->i_sb->s_fs_info;
        struct timespec now = ubifs_current_time(inode);
        struct ubifs_budget_req req = { .new_page = 1 };
@@ -1522,6 +1522,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma,
                        ubifs_release_dirty_inode_budget(c, ui);
        }
+        wait_for_stable_page(page);
        unlock_page(page);
        return 0;
diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c
index 1a7e2d8bdbe9..648b143606cc 100644
--- a/fs/ubifs/ioctl.c
+++ b/fs/ubifs/ioctl.c
@@ -147,7 +147,7 @@ out_unlock:
 long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
        int flags, err;
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(file);
        switch (cmd) {
        case FS_IOC_GETFLAGS:
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index 9daaeef675dd..4b826abb1528 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -2007,28 +2007,28 @@ static int dbg_populate_lsave(struct ubifs_info *c)
        if (!dbg_is_chk_gen(c))
                return 0;
-        if (random32() & 3)
+        if (prandom_u32() & 3)
                return 0;
        for (i = 0; i < c->lsave_cnt; i++)
                c->lsave[i] = c->main_first;
        list_for_each_entry(lprops, &c->empty_list, list)
-                c->lsave[random32() % c->lsave_cnt] = lprops->lnum;
+                c->lsave[prandom_u32() % c->lsave_cnt] = lprops->lnum;
        list_for_each_entry(lprops, &c->freeable_list, list)
-                c->lsave[random32() % c->lsave_cnt] = lprops->lnum;
+                c->lsave[prandom_u32() % c->lsave_cnt] = lprops->lnum;
        list_for_each_entry(lprops, &c->frdi_idx_list, list)
-                c->lsave[random32() % c->lsave_cnt] = lprops->lnum;
+                c->lsave[prandom_u32() % c->lsave_cnt] = lprops->lnum;
        heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1];
        for (i = 0; i < heap->cnt; i++)
-                c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum;
+                c->lsave[prandom_u32() % c->lsave_cnt] = heap->arr[i]->lnum;
        heap = &c->lpt_heap[LPROPS_DIRTY - 1];
        for (i = 0; i < heap->cnt; i++)
-                c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum;
+                c->lsave[prandom_u32() % c->lsave_cnt] = heap->arr[i]->lnum;
        heap = &c->lpt_heap[LPROPS_FREE - 1];
        for (i = 0; i < heap->cnt; i++)
-                c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum;
+                c->lsave[prandom_u32() % c->lsave_cnt] = heap->arr[i]->lnum;
        return 1;
 }
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index 769701ccb5c9..ba32da3fe08a 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -126,13 +126,14 @@ void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum)
                else if (inum > o->inum)
                        p = p->rb_right;
                else {
-                        if (o->dnext) {
+                        if (o->del) {
                                spin_unlock(&c->orphan_lock);
                                dbg_gen("deleted twice ino %lu",
                                        (unsigned long)inum);
                                return;
                        }
-                        if (o->cnext) {
+                        if (o->cmt) {
+                                o->del = 1;
                                o->dnext = c->orph_dnext;
                                c->orph_dnext = o;
                                spin_unlock(&c->orphan_lock);
@@ -172,7 +173,9 @@ int ubifs_orphan_start_commit(struct ubifs_info *c)
        last = &c->orph_cnext;
        list_for_each_entry(orphan, &c->orph_new, new_list) {
                ubifs_assert(orphan->new);
+                ubifs_assert(!orphan->cmt);
                orphan->new = 0;
+                orphan->cmt = 1;
                *last = orphan;
                last = &orphan->cnext;
        }
@@ -299,7 +302,9 @@ static int write_orph_node(struct ubifs_info *c, int atomic)
        cnext = c->orph_cnext;
        for (i = 0; i < cnt; i++) {
                orphan = cnext;
+                ubifs_assert(orphan->cmt);
                orph->inos[i] = cpu_to_le64(orphan->inum);
+                orphan->cmt = 0;
                cnext = orphan->cnext;
                orphan->cnext = NULL;
        }
@@ -378,6 +383,7 @@ static int consolidate(struct ubifs_info *c)
                list_for_each_entry(orphan, &c->orph_list, list) {
                        if (orphan->new)
                                continue;
+                        orphan->cmt = 1;
                        *last = orphan;
                        last = &orphan->cnext;
                        cnt += 1;
@@ -442,6 +448,7 @@ static void erase_deleted(struct ubifs_info *c)
                orphan = dnext;
                dnext = orphan->dnext;
                ubifs_assert(!orphan->new);
+                ubifs_assert(orphan->del);
                rb_erase(&orphan->rb, &c->orph_tree);
                list_del(&orphan->list);
                c->tot_orphans -= 1;
@@ -531,6 +538,7 @@ static int insert_dead_orphan(struct ubifs_info *c, ino_t inum)
        rb_link_node(&orphan->rb, parent, p);
        rb_insert_color(&orphan->rb, &c->orph_tree);
        list_add_tail(&orphan->list, &c->orph_list);
+        orphan->del = 1;
        orphan->dnext = c->orph_dnext;
        c->orph_dnext = orphan;
        dbg_mnt("ino %lu, new %d, tot %d", (unsigned long)inum,
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
index 523bbad69c0c..52a6559275c4 100644
--- a/fs/ubifs/tnc_commit.c
+++ b/fs/ubifs/tnc_commit.c
@@ -683,7 +683,7 @@ static int alloc_idx_lebs(struct ubifs_info *c, int cnt)
                c->ilebs[c->ileb_cnt++] = lnum;
                dbg_cmt("LEB %d", lnum);
        }
-        if (dbg_is_chk_index(c) && !(random32() & 7))
+        if (dbg_is_chk_index(c) && !(prandom_u32() & 7))
                return -ENOSPC;
        return 0;
 }
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index d133c276fe05..b2babce4d70f 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -904,6 +904,8 @@ struct ubifs_budget_req {
 * @dnext: next orphan to delete
 * @inum: inode number
 * @new: %1 => added since the last commit, otherwise %0
+ * @cmt: %1 => commit pending, otherwise %0
+ * @del: %1 => delete pending, otherwise %0
 */
 struct ubifs_orphan {
        struct rb_node rb;
@@ -912,7 +914,9 @@ struct ubifs_orphan {
        struct ubifs_orphan *cnext;
        struct ubifs_orphan *dnext;
        ino_t inum;
-        int new;
+        unsigned new:1;
+        unsigned cmt:1;
+        unsigned del:1;
 };
 /**
diff --git a/fs/udf/dir.c b/fs/udf/dir.c
index eb8bfe2b89a5..b3e93f5e17c3 100644
--- a/fs/udf/dir.c
+++ b/fs/udf/dir.c
@@ -186,7 +186,7 @@ out:
 static int udf_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-        struct inode *dir = filp->f_path.dentry->d_inode;
+        struct inode *dir = file_inode(filp);
        int result;
        if (filp->f_pos == 0) {
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 77b5953eaac8..29569dd08168 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -139,7 +139,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 {
        ssize_t retval;
        struct file *file = iocb->ki_filp;
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(file);
        int err, pos;
        size_t count = iocb->ki_left;
        struct udf_inode_info *iinfo = UDF_I(inode);
@@ -178,7 +178,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 long udf_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
-        struct inode *inode = filp->f_dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        long old_block, new_block;
        int result = -EINVAL;
@@ -204,7 +204,7 @@ long udf_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                goto out;
        case UDF_RELOCATE_BLOCKS:
                if (!capable(CAP_SYS_ADMIN)) {
-                        result = -EACCES;
+                        result = -EPERM;
                        goto out;
                }
                if (get_user(old_block, (long __user *)arg)) {
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index cbae1ed0b7c1..7a12e48ad819 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -67,6 +67,74 @@ static void udf_update_extents(struct inode *,
                               struct extent_position *);
 static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int);
+static void __udf_clear_extent_cache(struct inode *inode)
+{
+        struct udf_inode_info *iinfo = UDF_I(inode);
+        if (iinfo->cached_extent.lstart != -1) {
+                brelse(iinfo->cached_extent.epos.bh);
+                iinfo->cached_extent.lstart = -1;
+        }
+}
+/* Invalidate extent cache */
+static void udf_clear_extent_cache(struct inode *inode)
+{
+        struct udf_inode_info *iinfo = UDF_I(inode);
+        spin_lock(&iinfo->i_extent_cache_lock);
+        __udf_clear_extent_cache(inode);
+        spin_unlock(&iinfo->i_extent_cache_lock);
+}
+/* Return contents of extent cache */
+static int udf_read_extent_cache(struct inode *inode, loff_t bcount,
+                                 loff_t *lbcount, struct extent_position *pos)
+{
+        struct udf_inode_info *iinfo = UDF_I(inode);
+        int ret = 0;
+        spin_lock(&iinfo->i_extent_cache_lock);
+        if ((iinfo->cached_extent.lstart <= bcount) &&
+            (iinfo->cached_extent.lstart != -1)) {
+                /* Cache hit */
+                *lbcount = iinfo->cached_extent.lstart;
+                memcpy(pos, &iinfo->cached_extent.epos,
+                       sizeof(struct extent_position));
+                if (pos->bh)
+                        get_bh(pos->bh);
+                ret = 1;
+        }
+        spin_unlock(&iinfo->i_extent_cache_lock);
+        return ret;
+}
+/* Add extent to extent cache */
+static void udf_update_extent_cache(struct inode *inode, loff_t estart,
+                                    struct extent_position *pos, int next_epos)
+{
+        struct udf_inode_info *iinfo = UDF_I(inode);
+        spin_lock(&iinfo->i_extent_cache_lock);
+        /* Invalidate previously cached extent */
+        __udf_clear_extent_cache(inode);
+        if (pos->bh)
+                get_bh(pos->bh);
+        memcpy(&iinfo->cached_extent.epos, pos,
+               sizeof(struct extent_position));
+        iinfo->cached_extent.lstart = estart;
+        if (next_epos)
+                switch (iinfo->i_alloc_type) {
+                case ICBTAG_FLAG_AD_SHORT:
+                        iinfo->cached_extent.epos.offset -=
+                        sizeof(struct short_ad);
+                        break;
+                case ICBTAG_FLAG_AD_LONG:
+                        iinfo->cached_extent.epos.offset -=
+                        sizeof(struct long_ad);
+                }
+        spin_unlock(&iinfo->i_extent_cache_lock);
+}
 void udf_evict_inode(struct inode *inode)
 {
@@ -90,6 +158,7 @@ void udf_evict_inode(struct inode *inode)
        }
        kfree(iinfo->i_ext.i_data);
        iinfo->i_ext.i_data = NULL;
+        udf_clear_extent_cache(inode);
        if (want_delete) {
                udf_free_inode(inode);
        }
@@ -105,6 +174,7 @@ static void udf_write_failed(struct address_space *mapping, loff_t to)
                truncate_pagecache(inode, to, isize);
                if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
                        down_write(&iinfo->i_data_sem);
+                        udf_clear_extent_cache(inode);
                        udf_truncate_extents(inode);
                        up_write(&iinfo->i_data_sem);
                }
@@ -372,7 +442,7 @@ static int udf_get_block(struct inode *inode, sector_t block,
                iinfo->i_next_alloc_goal++;
        }
+        udf_clear_extent_cache(inode);
        phys = inode_getblk(inode, block, &err, &new);
        if (!phys)
                goto abort;
@@ -1171,6 +1241,7 @@ set_size:
        } else {
                if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
                        down_write(&iinfo->i_data_sem);
+                        udf_clear_extent_cache(inode);
                        memset(iinfo->i_ext.i_data + iinfo->i_lenEAttr + newsize,
                               0x00, bsize - newsize -
                               udf_file_entry_alloc_offset(inode));
@@ -1184,6 +1255,7 @@ set_size:
                if (err)
                        return err;
                down_write(&iinfo->i_data_sem);
+                udf_clear_extent_cache(inode);
                truncate_setsize(inode, newsize);
                udf_truncate_extents(inode);
                up_write(&iinfo->i_data_sem);
@@ -2156,11 +2228,12 @@ int8_t inode_bmap(struct inode *inode, sector_t block,
        struct udf_inode_info *iinfo;
        iinfo = UDF_I(inode);
-        pos->offset = 0;
+        if (!udf_read_extent_cache(inode, bcount, &lbcount, pos)) {
-        pos->block = iinfo->i_location;
+                pos->offset = 0;
-        pos->bh = NULL;
+                pos->block = iinfo->i_location;
+                pos->bh = NULL;
+        }
        *elen = 0;
        do {
                etype = udf_next_aext(inode, pos, eloc, elen, 1);
                if (etype == -1) {
@@ -2170,7 +2243,8 @@ int8_t inode_bmap(struct inode *inode, sector_t block,
                }
                lbcount += *elen;
        } while (lbcount <= bcount);
+        /* update extent cache */
+        udf_update_extent_cache(inode, lbcount - *elen, pos, 1);
        *offset = (bcount + *elen - lbcount) >> blocksize_bits;
        return etype;
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 95fee278ab9d..102c072c6bbf 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -1270,10 +1270,10 @@ static int udf_encode_fh(struct inode *inode, __u32 *fh, int *lenp,
        if (parent && (len < 5)) {
                *lenp = 5;
-                return 255;
+                return FILEID_INVALID;
        } else if (len < 3) {
                *lenp = 3;
-                return 255;
+                return FILEID_INVALID;
        }
        *lenp = 3;
diff --git a/fs/udf/super.c b/fs/udf/super.c
index d44fb568abe1..bc5b30a819e8 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -134,6 +134,8 @@ static struct inode *udf_alloc_inode(struct super_block *sb)
        ei->i_next_alloc_goal = 0;
        ei->i_strat4096 = 0;
        init_rwsem(&ei->i_data_sem);
+        ei->cached_extent.lstart = -1;
+        spin_lock_init(&ei->i_extent_cache_lock);
        return &ei->vfs_inode;
 }
@@ -307,7 +309,8 @@ static void udf_sb_free_partitions(struct super_block *sb)
 {
        struct udf_sb_info *sbi = UDF_SB(sb);
        int i;
+        if (sbi->s_partmaps == NULL)
+                return;
        for (i = 0; i < sbi->s_partitions; i++)
                udf_free_partition(&sbi->s_partmaps[i]);
        kfree(sbi->s_partmaps);
@@ -1020,7 +1023,6 @@ static struct udf_bitmap *udf_sb_alloc_bitmap(struct super_block *sb, u32 index)
        if (bitmap == NULL)
                return NULL;
-        bitmap->s_block_bitmap = (struct buffer_head **)(bitmap + 1);
        bitmap->s_nr_groups = nr_groups;
        return bitmap;
 }
@@ -1078,8 +1080,6 @@ static int udf_fill_partdesc_info(struct super_block *sb,
                if (!bitmap)
                        return 1;
                map->s_uspace.s_bitmap = bitmap;
-                bitmap->s_extLength = le32_to_cpu(
-                                phd->unallocSpaceBitmap.extLength);
                bitmap->s_extPosition = le32_to_cpu(
                                phd->unallocSpaceBitmap.extPosition);
                map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_BITMAP;
@@ -1114,8 +1114,6 @@ static int udf_fill_partdesc_info(struct super_block *sb,
                if (!bitmap)
                        return 1;
                map->s_fspace.s_bitmap = bitmap;
-                bitmap->s_extLength = le32_to_cpu(
-                                phd->freedSpaceBitmap.extLength);
                bitmap->s_extPosition = le32_to_cpu(
                                phd->freedSpaceBitmap.extPosition);
                map->s_partition_flags |= UDF_PART_FLAG_FREED_BITMAP;
@@ -1865,6 +1863,8 @@ static void udf_open_lvid(struct super_block *sb)
        mark_buffer_dirty(bh);
        sbi->s_lvid_dirty = 0;
        mutex_unlock(&sbi->s_alloc_mutex);
+        /* Make opening of filesystem visible on the media immediately */
+        sync_dirty_buffer(bh);
 }
 static void udf_close_lvid(struct super_block *sb)
@@ -1905,6 +1905,8 @@ static void udf_close_lvid(struct super_block *sb)
        mark_buffer_dirty(bh);
        sbi->s_lvid_dirty = 0;
        mutex_unlock(&sbi->s_alloc_mutex);
+        /* Make closing of filesystem visible on the media immediately */
+        sync_dirty_buffer(bh);
 }
 u64 lvid_get_unique_id(struct super_block *sb)
diff --git a/fs/udf/udf_i.h b/fs/udf/udf_i.h
index bb8309dcd5c1..b5cd8ed2aa12 100644
--- a/fs/udf/udf_i.h
+++ b/fs/udf/udf_i.h
@@ -1,6 +1,19 @@
 #ifndef _UDF_I_H
 #define _UDF_I_H
+struct extent_position {
+        struct buffer_head *bh;
+        uint32_t offset;
+        struct kernel_lb_addr block;
+};
+struct udf_ext_cache {
+        /* Extent position */
+        struct extent_position epos;
+        /* Start logical offset in bytes */
+        loff_t lstart;
+};
 /*
 * The i_data_sem and i_mutex serve for protection of allocation information
 * of a regular files and symlinks. This includes all extents belonging to
@@ -35,6 +48,9 @@ struct udf_inode_info {
                __u8            *i_data;
        } i_ext;
        struct rw_semaphore     i_data_sem;
+        struct udf_ext_cache cached_extent;
+        /* Spinlock for protecting extent cache */
+        spinlock_t i_extent_cache_lock;
        struct inode vfs_inode;
 };
diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h
index 5f027227f085..ed401e94aa8c 100644
--- a/fs/udf/udf_sb.h
+++ b/fs/udf/udf_sb.h
@@ -80,10 +80,9 @@ struct udf_virtual_data {
 };
 struct udf_bitmap {
-        __u32                   s_extLength;
        __u32                   s_extPosition;
-        __u16                   s_nr_groups;
+        int                     s_nr_groups;
-        struct buffer_head      **s_block_bitmap;
+        struct buffer_head      *s_block_bitmap[0];
 };
 struct udf_part_map {
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index de038da6f6bd..be7dabbbcb49 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -113,11 +113,6 @@ struct ustr {
        uint8_t u_len;
 };
-struct extent_position {
-        struct buffer_head *bh;
-        uint32_t offset;
-        struct kernel_lb_addr block;
-};
 /* super.c */
diff --git a/fs/ufs/Kconfig b/fs/ufs/Kconfig
index e4f10a40768a..0bf6e16f8d79 100644
--- a/fs/ufs/Kconfig
+++ b/fs/ufs/Kconfig
@@ -29,7 +29,7 @@ config UFS_FS
 config UFS_FS_WRITE
        bool "UFS file system write support (DANGEROUS)"
-        depends on UFS_FS && EXPERIMENTAL
+        depends on UFS_FS
        help
          Say Y here if you want to try writing to UFS partitions. This is
          experimental, so you should back up your UFS partitions beforehand.
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index dbc90994715a..3a75ca09c506 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -433,7 +433,7 @@ static int
 ufs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
        loff_t pos = filp->f_pos;
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = file_inode(filp);
        struct super_block *sb = inode->i_sb;
        unsigned int offset = pos & ~PAGE_CACHE_MASK;
        unsigned long n = pos >> PAGE_CACHE_SHIFT;
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 5a7ffe54f5d5..cc33aaf219f1 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -70,8 +70,8 @@ config XFS_RT
          If unsure, say N.
 config XFS_DEBUG
-        bool "XFS Debugging support (EXPERIMENTAL)"
+        bool "XFS Debugging support"
-        depends on XFS_FS && EXPERIMENTAL
+        depends on XFS_FS
        help
          Say Y here to get an XFS build with many debugging features,
          including ASSERT checks, function wrappers around macros,
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 393055fe3aef..0ad23253e8b1 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -1925,8 +1925,6 @@ xfs_alloc_fix_freelist(
        targs.mp = mp;
        targs.agbp = agbp;
        targs.agno = args->agno;
-        targs.mod = targs.minleft = targs.wasdel = targs.userdata =
-                targs.minalignslop = 0;
        targs.alignment = targs.minlen = targs.prod = targs.isfl = 1;
        targs.type = XFS_ALLOCTYPE_THIS_AG;
        targs.pag = pag;
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 4111a40ebe1a..5f707e537171 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -86,11 +86,11 @@ xfs_destroy_ioend(
        }
        if (ioend->io_iocb) {
+                inode_dio_done(ioend->io_inode);
                if (ioend->io_isasync) {
                        aio_complete(ioend->io_iocb, ioend->io_error ?
                                        ioend->io_error : ioend->io_result, 0);
                }
-                inode_dio_done(ioend->io_inode);
        }
        mempool_free(ioend, xfs_ioend_pool);
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index aaf472532b3c..888683844d98 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -300,9 +300,12 @@ xfs_attr_set_int(
        if (rsvd)
                args.trans->t_flags |= XFS_TRANS_RESERVE;
-        if ((error = xfs_trans_reserve(args.trans, args.total,
+        error = xfs_trans_reserve(args.trans, args.total,
-                        XFS_ATTRSET_LOG_RES(mp, args.total), 0,
+                                  XFS_ATTRSETM_LOG_RES(mp) +
-                        XFS_TRANS_PERM_LOG_RES, XFS_ATTRSET_LOG_COUNT))) {
+                                  XFS_ATTRSETRT_LOG_RES(mp) * args.total,
+                                  0, XFS_TRANS_PERM_LOG_RES,
+                                  XFS_ATTRSET_LOG_COUNT);
+        if (error) {
                xfs_trans_cancel(args.trans, 0);
                return(error);
        }
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 0e92d12765d2..b44af9211bd9 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -147,7 +147,10 @@ xfs_bmap_local_to_extents(
        xfs_fsblock_t   *firstblock,    /* first block allocated in xaction */
        xfs_extlen_t    total,          /* total blocks needed by transaction */
        int             *logflagsp,     /* inode logging flags */
-        int             whichfork);     /* data or attr fork */
+        int             whichfork,      /* data or attr fork */
+        void            (*init_fn)(struct xfs_buf *bp,
+                                   struct xfs_inode *ip,
+                                   struct xfs_ifork *ifp));
 /*
 * Search the extents list for the inode, for the extent containing bno.
@@ -357,7 +360,42 @@ xfs_bmap_add_attrfork_extents(
 }
 /*
- * Called from xfs_bmap_add_attrfork to handle local format files.
+ * Block initialisation functions for local to extent format conversion.
+ * As these get more complex, they will be moved to the relevant files,
+ * but for now they are too simple to worry about.
+ */
+STATIC void
+xfs_bmap_local_to_extents_init_fn(
+        struct xfs_buf          *bp,
+        struct xfs_inode        *ip,
+        struct xfs_ifork        *ifp)
+{
+        bp->b_ops = &xfs_bmbt_buf_ops;
+        memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes);
+}
+STATIC void
+xfs_symlink_local_to_remote(
+        struct xfs_buf          *bp,
+        struct xfs_inode        *ip,
+        struct xfs_ifork        *ifp)
+{
+        /* remote symlink blocks are not verifiable until CRCs come along */
+        bp->b_ops = NULL;
+        memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes);
+}
+/*
+ * Called from xfs_bmap_add_attrfork to handle local format files. Each
+ * different data fork content type needs a different callout to do the
+ * conversion. Some are basic and only require special block initialisation
+ * callouts for the data formating, others (directories) are so specialised they
+ * handle everything themselves.
+ *
+ * XXX (dgc): investigate whether directory conversion can use the generic
+ * formatting callout. It should be possible - it's just a very complex
+ * formatter. it would also require passing the transaction through to the init
+ * function.
 */
 STATIC int                                      /* error */
 xfs_bmap_add_attrfork_local(
@@ -368,25 +406,29 @@ xfs_bmap_add_attrfork_local(
        int                     *flags)         /* inode logging flags */
 {
        xfs_da_args_t           dargs;          /* args for dir/attr code */
-        int                     error;          /* error return value */
-        xfs_mount_t             *mp;            /* mount structure pointer */
        if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip))
                return 0;
        if (S_ISDIR(ip->i_d.di_mode)) {
-                mp = ip->i_mount;
                memset(&dargs, 0, sizeof(dargs));
                dargs.dp = ip;
                dargs.firstblock = firstblock;
                dargs.flist = flist;
-                dargs.total = mp->m_dirblkfsbs;
+                dargs.total = ip->i_mount->m_dirblkfsbs;
                dargs.whichfork = XFS_DATA_FORK;
                dargs.trans = tp;
-                error = xfs_dir2_sf_to_block(&dargs);
+                return xfs_dir2_sf_to_block(&dargs);
-        } else
+        }
-                error = xfs_bmap_local_to_extents(tp, ip, firstblock, 1, flags,
-                        XFS_DATA_FORK);
+        if (S_ISLNK(ip->i_d.di_mode))
-        return error;
+                return xfs_bmap_local_to_extents(tp, ip, firstblock, 1,
+                                                 flags, XFS_DATA_FORK,
+                                                 xfs_symlink_local_to_remote);
+        return xfs_bmap_local_to_extents(tp, ip, firstblock, 1, flags,
+                                         XFS_DATA_FORK,
+                                         xfs_bmap_local_to_extents_init_fn);
 }
 /*
@@ -3099,8 +3141,6 @@ xfs_bmap_extents_to_btree(
                args.fsbno = *firstblock;
        }
        args.minlen = args.maxlen = args.prod = 1;
-        args.total = args.minleft = args.alignment = args.mod = args.isfl =
-                args.minalignslop = 0;
        args.wasdel = wasdel;
        *logflagsp = 0;
        if ((error = xfs_alloc_vextent(&args))) {
@@ -3221,7 +3261,10 @@ xfs_bmap_local_to_extents(
        xfs_fsblock_t   *firstblock,    /* first block allocated in xaction */
        xfs_extlen_t    total,          /* total blocks needed by transaction */
        int             *logflagsp,     /* inode logging flags */
-        int             whichfork)      /* data or attr fork */
+        int             whichfork,
+        void            (*init_fn)(struct xfs_buf *bp,
+                                   struct xfs_inode *ip,
+                                   struct xfs_ifork *ifp))
 {
        int             error;          /* error return value */
        int             flags;          /* logging flags returned */
@@ -3241,12 +3284,12 @@ xfs_bmap_local_to_extents(
                xfs_buf_t       *bp;    /* buffer for extent block */
                xfs_bmbt_rec_host_t *ep;/* extent record pointer */
+                ASSERT((ifp->if_flags &
+                        (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFINLINE);
                memset(&args, 0, sizeof(args));
                args.tp = tp;
                args.mp = ip->i_mount;
                args.firstblock = *firstblock;
-                ASSERT((ifp->if_flags &
-                        (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFINLINE);
                /*
                 * Allocate a block.  We know we need only one, since the
                 * file currently fits in an inode.
@@ -3259,20 +3302,21 @@ xfs_bmap_local_to_extents(
                        args.type = XFS_ALLOCTYPE_NEAR_BNO;
                }
                args.total = total;
-                args.mod = args.minleft = args.alignment = args.wasdel =
-                        args.isfl = args.minalignslop = 0;
                args.minlen = args.maxlen = args.prod = 1;
-                if ((error = xfs_alloc_vextent(&args)))
+                error = xfs_alloc_vextent(&args);
+                if (error)
                        goto done;
-                /*
-                 * Can't fail, the space was reserved.
+                /* Can't fail, the space was reserved. */
-                 */
                ASSERT(args.fsbno != NULLFSBLOCK);
                ASSERT(args.len == 1);
                *firstblock = args.fsbno;
                bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);
-                bp->b_ops = &xfs_bmbt_buf_ops;
-                memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes);
+                /* initialise the block and copy the data */
+                init_fn(bp, ip, ifp);
+                /* account for the change in fork size and log everything */
                xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1);
                xfs_bmap_forkoff_reset(args.mp, ip, whichfork);
                xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
@@ -4680,9 +4724,6 @@ __xfs_bmapi_allocate(
                        return error;
        }
-        if (bma->flags & XFS_BMAPI_STACK_SWITCH)
-                bma->stack_switch = 1;
        error = xfs_bmap_alloc(bma);
        if (error)
                return error;
@@ -4922,8 +4963,32 @@ xfs_bmapi_write(
        XFS_STATS_INC(xs_blk_mapw);
        if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
+                /*
+                 * XXX (dgc): This assumes we are only called for inodes that
+                 * contain content neutral data in local format. Anything that
+                 * contains caller-specific data in local format that needs
+                 * transformation to move to a block format needs to do the
+                 * conversion to extent format itself.
+                 *
+                 * Directory data forks and attribute forks handle this
+                 * themselves, but with the addition of metadata verifiers every
+                 * data fork in local format now contains caller specific data
+                 * and as such conversion through this function is likely to be
+                 * broken.
+                 *
+                 * The only likely user of this branch is for remote symlinks,
+                 * but we cannot overwrite the data fork contents of the symlink
+                 * (EEXIST occurs higher up the stack) and so it will never go
+                 * from local format to extent format here. Hence I don't think
+                 * this branch is ever executed intentionally and we should
+                 * consider removing it and asserting that xfs_bmapi_write()
+                 * cannot be called directly on local format forks. i.e. callers
+                 * are completely responsible for local to extent format
+                 * conversion, not xfs_bmapi_write().
+                 */
                error = xfs_bmap_local_to_extents(tp, ip, firstblock, total,
-                                                  &bma.logflags, whichfork);
+                                        &bma.logflags, whichfork,
+                                        xfs_bmap_local_to_extents_init_fn);
                if (error)
                        goto error0;
        }
@@ -4956,6 +5021,9 @@ xfs_bmapi_write(
        bma.flist = flist;
        bma.firstblock = firstblock;
+        if (flags & XFS_BMAPI_STACK_SWITCH)
+                bma.stack_switch = 1;
        while (bno < end && n < *nmap) {
                inhole = eof || bma.got.br_startoff > bno;
                wasdelay = !inhole && isnullstartblock(bma.got.br_startblock);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 26673a0b20e7..4e8f0df82d02 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -175,7 +175,7 @@ xfs_buf_get_maps(
        bp->b_map_count = map_count;
        if (map_count == 1) {
-                bp->b_maps = &bp->b_map;
+                bp->b_maps = &bp->__b_map;
                return 0;
        }
@@ -193,7 +193,7 @@ static void
 xfs_buf_free_maps(
        struct xfs_buf  *bp)
 {
-        if (bp->b_maps != &bp->b_map) {
+        if (bp->b_maps != &bp->__b_map) {
                kmem_free(bp->b_maps);
                bp->b_maps = NULL;
        }
@@ -377,8 +377,8 @@ xfs_buf_allocate_memory(
        }
 use_alloc_page:
-        start = BBTOB(bp->b_map.bm_bn) >> PAGE_SHIFT;
+        start = BBTOB(bp->b_maps[0].bm_bn) >> PAGE_SHIFT;
-        end = (BBTOB(bp->b_map.bm_bn + bp->b_length) + PAGE_SIZE - 1)
+        end = (BBTOB(bp->b_maps[0].bm_bn + bp->b_length) + PAGE_SIZE - 1)
                                                                >> PAGE_SHIFT;
        page_count = end - start;
        error = _xfs_buf_get_pages(bp, page_count, flags);
@@ -487,6 +487,7 @@ _xfs_buf_find(
        struct rb_node          *parent;
        xfs_buf_t               *bp;
        xfs_daddr_t             blkno = map[0].bm_bn;
+        xfs_daddr_t             eofs;
        int                     numblks = 0;
        int                     i;
@@ -498,6 +499,23 @@ _xfs_buf_find(
        ASSERT(!(numbytes < (1 << btp->bt_sshift)));
        ASSERT(!(BBTOB(blkno) & (xfs_off_t)btp->bt_smask));
+        /*
+         * Corrupted block numbers can get through to here, unfortunately, so we
+         * have to check that the buffer falls within the filesystem bounds.
+         */
+        eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks);
+        if (blkno >= eofs) {
+                /*
+                 * XXX (dgc): we should really be returning EFSCORRUPTED here,
+                 * but none of the higher level infrastructure supports
+                 * returning a specific error on buffer lookup failures.
+                 */
+                xfs_alert(btp->bt_mount,
+                          "%s: Block out of range: block 0x%llx, EOFS 0x%llx ",
+                          __func__, blkno, eofs);
+                return NULL;
+        }
        /* get tree root */
        pag = xfs_perag_get(btp->bt_mount,
                                xfs_daddr_to_agno(btp->bt_mount, blkno));
@@ -640,7 +658,7 @@ _xfs_buf_read(
        xfs_buf_flags_t         flags)
 {
        ASSERT(!(flags & XBF_WRITE));
-        ASSERT(bp->b_map.bm_bn != XFS_BUF_DADDR_NULL);
+        ASSERT(bp->b_maps[0].bm_bn != XFS_BUF_DADDR_NULL);
        bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD);
        bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);
@@ -933,8 +951,6 @@ xfs_buf_trylock(
        locked = down_trylock(&bp->b_sema) == 0;
        if (locked)
                XB_SET_OWNER(bp);
-        else if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
-                xfs_log_force(bp->b_target->bt_mount, 0);
        trace_xfs_buf_trylock(bp, _RET_IP_);
        return locked;
@@ -1487,6 +1503,8 @@ restart:
        while (!list_empty(&btp->bt_lru)) {
                bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
                if (atomic_read(&bp->b_hold) > 1) {
+                        trace_xfs_buf_wait_buftarg(bp, _RET_IP_);
+                        list_move_tail(&bp->b_lru, &btp->bt_lru);
                        spin_unlock(&btp->bt_lru_lock);
                        delay(100);
                        goto restart;
@@ -1709,7 +1727,7 @@ xfs_buf_cmp(
        struct xfs_buf  *bp = container_of(b, struct xfs_buf, b_list);
        xfs_daddr_t             diff;
-        diff = ap->b_map.bm_bn - bp->b_map.bm_bn;
+        diff = ap->b_maps[0].bm_bn - bp->b_maps[0].bm_bn;
        if (diff < 0)
                return -1;
        if (diff > 0)
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 23f5642480bb..433a12ed7b17 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -151,7 +151,7 @@ typedef struct xfs_buf {
        struct page             **b_pages;      /* array of page pointers */
        struct page             *b_page_array[XB_PAGES]; /* inline pages */
        struct xfs_buf_map      *b_maps;        /* compound buffer map */
-        struct xfs_buf_map      b_map;          /* inline compound buffer map */
+        struct xfs_buf_map      __b_map;        /* inline compound buffer map */
        int                     b_map_count;
        int                     b_io_length;    /* IO size in BBs */
        atomic_t                b_pin_count;    /* pin count */
@@ -330,8 +330,8 @@ void xfs_buf_stale(struct xfs_buf *bp);
 * In future, uncached buffers will pass the block number directly to the io
 * request function and hence these macros will go away at that point.
 */
-#define XFS_BUF_ADDR(bp)                ((bp)->b_map.bm_bn)
+#define XFS_BUF_ADDR(bp)                ((bp)->b_maps[0].bm_bn)
-#define XFS_BUF_SET_ADDR(bp, bno)       ((bp)->b_map.bm_bn = (xfs_daddr_t)(bno))
+#define XFS_BUF_SET_ADDR(bp, bno)       ((bp)->b_maps[0].bm_bn = (xfs_daddr_t)(bno))
 static inline void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref)
 {
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index becf4a97efc6..cf263476d6b4 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -37,109 +37,6 @@ static inline struct xfs_buf_log_item *BUF_ITEM(struct xfs_log_item *lip)
        return container_of(lip, struct xfs_buf_log_item, bli_item);
 }
-#ifdef XFS_TRANS_DEBUG
-/*
- * This function uses an alternate strategy for tracking the bytes
- * that the user requests to be logged.  This can then be used
- * in conjunction with the bli_orig array in the buf log item to
- * catch bugs in our callers' code.
- *
- * We also double check the bits set in xfs_buf_item_log using a
- * simple algorithm to check that every byte is accounted for.
- */
-STATIC void
-xfs_buf_item_log_debug(
-        xfs_buf_log_item_t      *bip,
-        uint                    first,
-        uint                    last)
-{
-        uint    x;
-        uint    byte;
-        uint    nbytes;
-        uint    chunk_num;
-        uint    word_num;
-        uint    bit_num;
-        uint    bit_set;
-        uint    *wordp;
-        ASSERT(bip->bli_logged != NULL);
-        byte = first;
-        nbytes = last - first + 1;
-        bfset(bip->bli_logged, first, nbytes);
-        for (x = 0; x < nbytes; x++) {
-                chunk_num = byte >> XFS_BLF_SHIFT;
-                word_num = chunk_num >> BIT_TO_WORD_SHIFT;
-                bit_num = chunk_num & (NBWORD - 1);
-                wordp = &(bip->bli_format.blf_data_map[word_num]);
-                bit_set = *wordp & (1 << bit_num);
-                ASSERT(bit_set);
-                byte++;
-        }
-}
-/*
- * This function is called when we flush something into a buffer without
- * logging it.  This happens for things like inodes which are logged
- * separately from the buffer.
- */
-void
-xfs_buf_item_flush_log_debug(
-        xfs_buf_t       *bp,
-        uint            first,
-        uint            last)
-{
-        xfs_buf_log_item_t      *bip = bp->b_fspriv;
-        uint                    nbytes;
-        if (bip == NULL || (bip->bli_item.li_type != XFS_LI_BUF))
-                return;
-        ASSERT(bip->bli_logged != NULL);
-        nbytes = last - first + 1;
-        bfset(bip->bli_logged, first, nbytes);
-}
-/*
- * This function is called to verify that our callers have logged
- * all the bytes that they changed.
- *
- * It does this by comparing the original copy of the buffer stored in
- * the buf log item's bli_orig array to the current copy of the buffer
- * and ensuring that all bytes which mismatch are set in the bli_logged
- * array of the buf log item.
- */
-STATIC void
-xfs_buf_item_log_check(
-        xfs_buf_log_item_t      *bip)
-{
-        char            *orig;
-        char            *buffer;
-        int             x;
-        xfs_buf_t       *bp;
-        ASSERT(bip->bli_orig != NULL);
-        ASSERT(bip->bli_logged != NULL);
-        bp = bip->bli_buf;
-        ASSERT(bp->b_length > 0);
-        ASSERT(bp->b_addr != NULL);
-        orig = bip->bli_orig;
-        buffer = bp->b_addr;
-        for (x = 0; x < BBTOB(bp->b_length); x++) {
-                if (orig[x] != buffer[x] && !btst(bip->bli_logged, x)) {
-                        xfs_emerg(bp->b_mount,
-                                "%s: bip %x buffer %x orig %x index %d",
-                                __func__, bip, bp, orig, x);
-                        ASSERT(0);
-                }
-        }
-}
-#else
-#define         xfs_buf_item_log_debug(x,y,z)
-#define         xfs_buf_item_log_check(x)
-#endif
 STATIC void     xfs_buf_do_callbacks(struct xfs_buf *bp);
 /*
@@ -237,7 +134,7 @@ xfs_buf_item_size(
                 * cancel flag in it.
                 */
                trace_xfs_buf_item_size_stale(bip);
-                ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL);
+                ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
                return bip->bli_format_count;
        }
@@ -278,7 +175,7 @@ xfs_buf_item_format_segment(
        uint            buffer_offset;
        /* copy the flags across from the base format item */
-        blfp->blf_flags = bip->bli_format.blf_flags;
+        blfp->blf_flags = bip->__bli_format.blf_flags;
        /*
         * Base size is the actual size of the ondisk structure - it reflects
@@ -287,6 +184,17 @@ xfs_buf_item_format_segment(
         */
        base_size = offsetof(struct xfs_buf_log_format, blf_data_map) +
                        (blfp->blf_map_size * sizeof(blfp->blf_data_map[0]));
+        nvecs = 0;
+        first_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0);
+        if (!(bip->bli_flags & XFS_BLI_STALE) && first_bit == -1) {
+                /*
+                 * If the map is not be dirty in the transaction, mark
+                 * the size as zero and do not advance the vector pointer.
+                 */
+                goto out;
+        }
        vecp->i_addr = blfp;
        vecp->i_len = base_size;
        vecp->i_type = XLOG_REG_TYPE_BFORMAT;
@@ -301,15 +209,13 @@ xfs_buf_item_format_segment(
                 */
                trace_xfs_buf_item_format_stale(bip);
                ASSERT(blfp->blf_flags & XFS_BLF_CANCEL);
-                blfp->blf_size = nvecs;
+                goto out;
-                return vecp;
        }
        /*
         * Fill in an iovec for each set of contiguous chunks.
         */
-        first_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0);
-        ASSERT(first_bit != -1);
        last_bit = first_bit;
        nbits = 1;
        for (;;) {
@@ -371,7 +277,8 @@ xfs_buf_item_format_segment(
                        nbits++;
                }
        }
-        bip->bli_format.blf_size = nvecs;
+out:
+        blfp->blf_size = nvecs;
        return vecp;
 }
@@ -405,7 +312,7 @@ xfs_buf_item_format(
        if (bip->bli_flags & XFS_BLI_INODE_BUF) {
                if (!((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) &&
                      xfs_log_item_in_current_chkpt(lip)))
-                        bip->bli_format.blf_flags |= XFS_BLF_INODE_BUF;
+                        bip->__bli_format.blf_flags |= XFS_BLF_INODE_BUF;
                bip->bli_flags &= ~XFS_BLI_INODE_BUF;
        }
@@ -419,7 +326,6 @@ xfs_buf_item_format(
         * Check to make sure everything is consistent.
         */
        trace_xfs_buf_item_format(bip);
-        xfs_buf_item_log_check(bip);
 }
 /*
@@ -485,7 +391,7 @@ xfs_buf_item_unpin(
                ASSERT(bip->bli_flags & XFS_BLI_STALE);
                ASSERT(xfs_buf_islocked(bp));
                ASSERT(XFS_BUF_ISSTALE(bp));
-                ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL);
+                ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
                trace_xfs_buf_item_unpin_stale(bip);
@@ -563,8 +469,18 @@ xfs_buf_item_push(
        if (xfs_buf_ispinned(bp))
                return XFS_ITEM_PINNED;
-        if (!xfs_buf_trylock(bp))
+        if (!xfs_buf_trylock(bp)) {
+                /*
+                 * If we have just raced with a buffer being pinned and it has
+                 * been marked stale, we could end up stalling until someone else
+                 * issues a log force to unpin the stale buffer. Check for the
+                 * race condition here so xfsaild recognizes the buffer is pinned
+                 * and queues a log force to move it along.
+                 */
+                if (xfs_buf_ispinned(bp))
+                        return XFS_ITEM_PINNED;
                return XFS_ITEM_LOCKED;
+        }
        ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
@@ -601,7 +517,7 @@ xfs_buf_item_unlock(
 {
        struct xfs_buf_log_item *bip = BUF_ITEM(lip);
        struct xfs_buf          *bp = bip->bli_buf;
-        int                     aborted;
+        int                     aborted, clean, i;
        uint                    hold;
        /* Clear the buffer's association with this transaction. */
@@ -631,7 +547,7 @@ xfs_buf_item_unlock(
         */
        if (bip->bli_flags & XFS_BLI_STALE) {
                trace_xfs_buf_item_unlock_stale(bip);
-                ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL);
+                ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
                if (!aborted) {
                        atomic_dec(&bip->bli_refcount);
                        return;
@@ -642,12 +558,27 @@ xfs_buf_item_unlock(
        /*
         * If the buf item isn't tracking any data, free it, otherwise drop the
-         * reference we hold to it.
+         * reference we hold to it. If we are aborting the transaction, this may
+         * be the only reference to the buf item, so we free it anyway
+         * regardless of whether it is dirty or not. A dirty abort implies a
+         * shutdown, anyway.
         */
-        if (xfs_bitmap_empty(bip->bli_format.blf_data_map,
+        clean = 1;
-                             bip->bli_format.blf_map_size))
+        for (i = 0; i < bip->bli_format_count; i++) {
+                if (!xfs_bitmap_empty(bip->bli_formats[i].blf_data_map,
+                             bip->bli_formats[i].blf_map_size)) {
+                        clean = 0;
+                        break;
+                }
+        }
+        if (clean)
                xfs_buf_item_relse(bp);
-        else
+        else if (aborted) {
+                if (atomic_dec_and_test(&bip->bli_refcount)) {
+                        ASSERT(XFS_FORCED_SHUTDOWN(lip->li_mountp));
+                        xfs_buf_item_relse(bp);
+                }
+        } else
                atomic_dec(&bip->bli_refcount);
        if (!hold)
@@ -716,7 +647,7 @@ xfs_buf_item_get_format(
        bip->bli_format_count = count;
        if (count == 1) {
-                bip->bli_formats = &bip->bli_format;
+                bip->bli_formats = &bip->__bli_format;
                return 0;
        }
@@ -731,7 +662,7 @@ STATIC void
 xfs_buf_item_free_format(
        struct xfs_buf_log_item *bip)
 {
-        if (bip->bli_formats != &bip->bli_format) {
+        if (bip->bli_formats != &bip->__bli_format) {
                kmem_free(bip->bli_formats);
                bip->bli_formats = NULL;
        }
@@ -898,8 +829,6 @@ xfs_buf_item_log_segment(
                mask = (1 << end_bit) - 1;
                *wordp |= mask;
        }
-        xfs_buf_item_log_debug(bip, first, last);
 }
 /*
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index 6850f49f4af3..ee36c88ecfde 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -98,13 +98,9 @@ typedef struct xfs_buf_log_item {
        unsigned int            bli_flags;      /* misc flags */
        unsigned int            bli_recur;      /* lock recursion count */
        atomic_t                bli_refcount;   /* cnt of tp refs */
-#ifdef XFS_TRANS_DEBUG
-        char                    *bli_orig;      /* original buffer copy */
-        char                    *bli_logged;    /* bytes logged (bitmap) */
-#endif
        int                     bli_format_count;       /* count of headers */
        struct xfs_buf_log_format *bli_formats; /* array of in-log header ptrs */
-        struct xfs_buf_log_format bli_format;   /* embedded in-log header */
+        struct xfs_buf_log_format __bli_format; /* embedded in-log header */
 } xfs_buf_log_item_t;
 void    xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *);
@@ -117,16 +113,6 @@ void	xfs_buf_attach_iodone(struct xfs_buf *,
 void    xfs_buf_iodone_callbacks(struct xfs_buf *);
 void    xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *);
-#ifdef XFS_TRANS_DEBUG
-void
-xfs_buf_item_flush_log_debug(
-        struct xfs_buf *bp,
-        uint    first,
-        uint    last);
-#else
-#define xfs_buf_item_flush_log_debug(bp, first, last)
-#endif
 #endif  /* __KERNEL__ */
 #endif  /* __XFS_BUF_ITEM_H__ */
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index d0e9c74d3d96..f852b082a084 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -78,14 +78,14 @@ xfs_swapext(
                goto out_put_tmp_file;
        }
-        if (IS_SWAPFILE(f.file->f_path.dentry->d_inode) ||
+        if (IS_SWAPFILE(file_inode(f.file)) ||
-            IS_SWAPFILE(tmp.file->f_path.dentry->d_inode)) {
+            IS_SWAPFILE(file_inode(tmp.file))) {
                error = XFS_ERROR(EINVAL);
                goto out_put_tmp_file;
        }
-        ip = XFS_I(f.file->f_path.dentry->d_inode);
+        ip = XFS_I(file_inode(f.file));
-        tip = XFS_I(tmp.file->f_path.dentry->d_inode);
+        tip = XFS_I(file_inode(tmp.file));
        if (ip->i_mount != tip->i_mount) {
                error = XFS_ERROR(EINVAL);
@@ -246,10 +246,10 @@ xfs_swap_extents(
                goto out_unlock;
        }
-        error = -filemap_write_and_wait(VFS_I(ip)->i_mapping);
+        error = -filemap_write_and_wait(VFS_I(tip)->i_mapping);
        if (error)
                goto out_unlock;
-        truncate_pagecache_range(VFS_I(ip), 0, -1);
+        truncate_pagecache_range(VFS_I(tip), 0, -1);
        /* Verify O_DIRECT for ftmp */
        if (VN_CACHED(VFS_I(tip)) != 0) {
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 7536faaa61e7..12afe07a91d7 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -355,10 +355,12 @@ xfs_dir2_block_addname(
        /*
         * If need to compact the leaf entries, do it now.
         */
-        if (compact)
+        if (compact) {
                xfs_dir2_block_compact(tp, bp, hdr, btp, blp, &needlog,
                                      &lfloghigh, &lfloglow);
-        else if (btp->stale) {
+                /* recalculate blp post-compaction */
+                blp = xfs_dir2_block_leaf_p(btp);
+        } else if (btp->stale) {
                /*
                 * Set leaf logging boundaries to impossible state.
                 * For the no-stale case they're set explicitly.
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 9e1bf5294c91..8025eb23ad72 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -612,15 +612,9 @@ xfs_qm_dqread(
        if (flags & XFS_QMOPT_DQALLOC) {
                tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
                error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp),
-                                XFS_WRITE_LOG_RES(mp) +
+                                          XFS_QM_DQALLOC_LOG_RES(mp), 0,
-                                /*
+                                          XFS_TRANS_PERM_LOG_RES,
-                                 * Round the chunklen up to the next multiple
+                                          XFS_WRITE_LOG_COUNT);
-                                 * of 128 (buf log item chunk size)).
-                                 */
-                                BBTOB(mp->m_quotainfo->qi_dqchunklen) - 1 + 128,
-                                0,
-                                XFS_TRANS_PERM_LOG_RES,
-                                XFS_WRITE_LOG_COUNT);
                if (error)
                        goto error1;
                cancelflags = XFS_TRANS_RELEASE_LOG_RES;
diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c
index a83611849cee..c585bc646395 100644
--- a/fs/xfs/xfs_export.c
+++ b/fs/xfs/xfs_export.c
@@ -48,7 +48,7 @@ static int xfs_fileid_length(int fileid_type)
        case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
                return 6;
        }
-        return 255; /* invalid */
+        return FILEID_INVALID;
 }
 STATIC int
@@ -90,7 +90,7 @@ xfs_fs_encode_fh(
        len = xfs_fileid_length(fileid_type);
        if (*max_len < len) {
                *max_len = len;
-                return 255;
+                return FILEID_INVALID;
        }
        *max_len = len;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 67284edb84d7..f03bf1a456fb 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -811,7 +811,7 @@ xfs_file_fallocate(
        loff_t          offset,
        loff_t          len)
 {
-        struct inode    *inode = file->f_path.dentry->d_inode;
+        struct inode    *inode = file_inode(file);
        long            error;
        loff_t          new_size = 0;
        xfs_flock64_t   bf;
@@ -912,7 +912,7 @@ xfs_file_readdir(
        void            *dirent,
        filldir_t       filldir)
 {
-        struct inode    *inode = filp->f_path.dentry->d_inode;
+        struct inode    *inode = file_inode(filp);
        xfs_inode_t     *ip = XFS_I(inode);
        int             error;
        size_t          bufsize;
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 94eaeedc5498..2866b8c78b7a 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -709,8 +709,8 @@ xfs_fs_log_dummy(
        int             error;
        tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP);
-        error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
+        error = xfs_trans_reserve(tp, 0, XFS_SB_LOG_RES(mp), 0, 0,
-                                        XFS_DEFAULT_LOG_COUNT);
+                                  XFS_DEFAULT_LOG_COUNT);
        if (error) {
                xfs_trans_cancel(tp, 0);
                return error;
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index a815412eab80..515bf71ce01c 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -279,8 +279,6 @@ xfs_ialloc_ag_alloc(
                  (args.agbno < be32_to_cpu(agi->agi_length)))) {
                args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
                args.type = XFS_ALLOCTYPE_THIS_BNO;
-                args.mod = args.total = args.wasdel = args.isfl =
-                        args.userdata = args.minalignslop = 0;
                args.prod = 1;
                /*
@@ -333,8 +331,6 @@ xfs_ialloc_ag_alloc(
                 * Allocate a fixed-size extent of inodes.
                 */
                args.type = XFS_ALLOCTYPE_NEAR_BNO;
-                args.mod = args.total = args.wasdel = args.isfl =
-                        args.userdata = args.minalignslop = 0;
                args.prod = 1;
                /*
                 * Allow space for the inode btree to split.
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 66282dcb821b..4f201656d2d9 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2379,9 +2379,6 @@ xfs_iflush_fork(
        char                    *cp;
        xfs_ifork_t             *ifp;
        xfs_mount_t             *mp;
-#ifdef XFS_TRANS_DEBUG
-        int                     first;
-#endif
        static const short      brootflag[2] =
                { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
        static const short      dataflag[2] =
@@ -2724,9 +2721,6 @@ xfs_iflush_int(
        xfs_inode_log_item_t    *iip;
        xfs_dinode_t            *dip;
        xfs_mount_t             *mp;
-#ifdef XFS_TRANS_DEBUG
-        int                     first;
-#endif
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
        ASSERT(xfs_isiflocked(ip));
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 22baf6ea4fac..237e7f6f2ab3 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -419,6 +419,7 @@ static inline void xfs_iflock(struct xfs_inode *ip)
 static inline void xfs_ifunlock(struct xfs_inode *ip)
 {
        xfs_iflags_clear(ip, XFS_IFLOCK);
+        smp_mb();
        wake_up_bit(&ip->i_flags, __XFS_IFLOCK_BIT);
 }
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index d041d47d9d86..f034bd1652f0 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -269,17 +269,6 @@ xfs_inode_item_format(
                } else {
                        ASSERT(!(iip->ili_fields &
                                 XFS_ILOG_DBROOT));
-#ifdef XFS_TRANS_DEBUG
-                        if (iip->ili_root_size > 0) {
-                                ASSERT(iip->ili_root_size ==
-                                       ip->i_df.if_broot_bytes);
-                                ASSERT(memcmp(iip->ili_orig_root,
-                                            ip->i_df.if_broot,
-                                            iip->ili_root_size) == 0);
-                        } else {
-                                ASSERT(ip->i_df.if_broot_bytes == 0);
-                        }
-#endif
                        iip->ili_fields &= ~XFS_ILOG_DBROOT;
                }
                break;
@@ -678,11 +667,6 @@ void
 xfs_inode_item_destroy(
        xfs_inode_t     *ip)
 {
-#ifdef XFS_TRANS_DEBUG
-        if (ip->i_itemp->ili_root_size != 0) {
-                kmem_free(ip->i_itemp->ili_orig_root);
-        }
-#endif
        kmem_zone_free(xfs_ili_zone, ip->i_itemp);
 }
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index 376d4d0b2635..779812fb3d80 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -148,10 +148,6 @@ typedef struct xfs_inode_log_item {
                                                      data exts */
        struct xfs_bmbt_rec     *ili_aextents_buf; /* array of logged
                                                      attr exts */
-#ifdef XFS_TRANS_DEBUG
-        int                     ili_root_size;
-        char                    *ili_orig_root;
-#endif
        xfs_inode_log_format_t  ili_format;        /* logged structure */
 } xfs_inode_log_item_t;
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index c1c3ef88a260..d681e34c2950 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -80,7 +80,7 @@ xfs_find_handle(
                f = fdget(hreq->fd);
                if (!f.file)
                        return -EBADF;
-                inode = f.file->f_path.dentry->d_inode;
+                inode = file_inode(f.file);
        } else {
                error = user_lpath((const char __user *)hreq->path, &path);
                if (error)
@@ -168,7 +168,7 @@ xfs_handle_to_dentry(
        /*
         * Only allow handle opens under a directory.
         */
-        if (!S_ISDIR(parfilp->f_path.dentry->d_inode->i_mode))
+        if (!S_ISDIR(file_inode(parfilp)->i_mode))
                return ERR_PTR(-ENOTDIR);
        if (hlen != sizeof(xfs_handle_t))
@@ -1334,7 +1334,7 @@ xfs_file_ioctl(
        unsigned int            cmd,
        unsigned long           p)
 {
-        struct inode            *inode = filp->f_path.dentry->d_inode;
+        struct inode            *inode = file_inode(filp);
        struct xfs_inode        *ip = XFS_I(inode);
        struct xfs_mount        *mp = ip->i_mount;
        void                    __user *arg = (void __user *)p;
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index 1244274a5674..63b8fc432151 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -530,7 +530,7 @@ xfs_file_compat_ioctl(
        unsigned                cmd,
        unsigned long           p)
 {
-        struct inode            *inode = filp->f_path.dentry->d_inode;
+        struct inode            *inode = file_inode(filp);
        struct xfs_inode        *ip = XFS_I(inode);
        struct xfs_mount        *mp = ip->i_mount;
        void                    __user *arg = (void __user *)p;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index add06b4e9a63..912d83d8860a 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -311,6 +311,62 @@ xfs_iomap_eof_want_preallocate(
 }
 /*
+ * Determine the initial size of the preallocation. We are beyond the current
+ * EOF here, but we need to take into account whether this is a sparse write or
+ * an extending write when determining the preallocation size.  Hence we need to
+ * look up the extent that ends at the current write offset and use the result
+ * to determine the preallocation size.
+ *
+ * If the extent is a hole, then preallocation is essentially disabled.
+ * Otherwise we take the size of the preceeding data extent as the basis for the
+ * preallocation size. If the size of the extent is greater than half the
+ * maximum extent length, then use the current offset as the basis. This ensures
+ * that for large files the preallocation size always extends to MAXEXTLEN
+ * rather than falling short due to things like stripe unit/width alignment of
+ * real extents.
+ */
+STATIC int
+xfs_iomap_eof_prealloc_initial_size(
+        struct xfs_mount        *mp,
+        struct xfs_inode        *ip,
+        xfs_off_t               offset,
+        xfs_bmbt_irec_t         *imap,
+        int                     nimaps)
+{
+        xfs_fileoff_t   start_fsb;
+        int             imaps = 1;
+        int             error;
+        ASSERT(nimaps >= imaps);
+        /* if we are using a specific prealloc size, return now */
+        if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
+                return 0;
+        /*
+         * As we write multiple pages, the offset will always align to the
+         * start of a page and hence point to a hole at EOF. i.e. if the size is
+         * 4096 bytes, we only have one block at FSB 0, but XFS_B_TO_FSB(4096)
+         * will return FSB 1. Hence if there are blocks in the file, we want to
+         * point to the block prior to the EOF block and not the hole that maps
+         * directly at @offset.
+         */
+        start_fsb = XFS_B_TO_FSB(mp, offset);
+        if (start_fsb)
+                start_fsb--;
+        error = xfs_bmapi_read(ip, start_fsb, 1, imap, &imaps, XFS_BMAPI_ENTIRE);
+        if (error)
+                return 0;
+        ASSERT(imaps == 1);
+        if (imap[0].br_startblock == HOLESTARTBLOCK)
+                return 0;
+        if (imap[0].br_blockcount <= (MAXEXTLEN >> 1))
+                return imap[0].br_blockcount;
+        return XFS_B_TO_FSB(mp, offset);
+}
+/*
 * If we don't have a user specified preallocation size, dynamically increase
 * the preallocation size as the size of the file grows. Cap the maximum size
 * at a single extent or less if the filesystem is near full. The closer the
@@ -319,20 +375,19 @@ xfs_iomap_eof_want_preallocate(
 STATIC xfs_fsblock_t
 xfs_iomap_prealloc_size(
        struct xfs_mount        *mp,
-        struct xfs_inode        *ip)
+        struct xfs_inode        *ip,
+        xfs_off_t               offset,
+        struct xfs_bmbt_irec    *imap,
+        int                     nimaps)
 {
        xfs_fsblock_t           alloc_blocks = 0;
-        if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) {
+        alloc_blocks = xfs_iomap_eof_prealloc_initial_size(mp, ip, offset,
+                                                           imap, nimaps);
+        if (alloc_blocks > 0) {
                int shift = 0;
                int64_t freesp;
-                /*
-                 * rounddown_pow_of_two() returns an undefined result
-                 * if we pass in alloc_blocks = 0. Hence the "+ 1" to
-                 * ensure we always pass in a non-zero value.
-                 */
-                alloc_blocks = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)) + 1;
                alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN,
                                        rounddown_pow_of_two(alloc_blocks));
@@ -351,6 +406,15 @@ xfs_iomap_prealloc_size(
                }
                if (shift)
                        alloc_blocks >>= shift;
+                /*
+                 * If we are still trying to allocate more space than is
+                 * available, squash the prealloc hard. This can happen if we
+                 * have a large file on a small filesystem and the above
+                 * lowspace thresholds are smaller than MAXEXTLEN.
+                 */
+                while (alloc_blocks >= freesp)
+                        alloc_blocks >>= 4;
        }
        if (alloc_blocks < mp->m_writeio_blocks)
@@ -390,7 +454,6 @@ xfs_iomap_write_delay(
        extsz = xfs_get_extsz_hint(ip);
        offset_fsb = XFS_B_TO_FSBT(mp, offset);
        error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count,
                                imap, XFS_WRITE_IMAPS, &prealloc);
        if (error)
@@ -398,7 +461,10 @@ xfs_iomap_write_delay(
 retry:
        if (prealloc) {
-                xfs_fsblock_t   alloc_blocks = xfs_iomap_prealloc_size(mp, ip);
+                xfs_fsblock_t   alloc_blocks;
+                alloc_blocks = xfs_iomap_prealloc_size(mp, ip, offset, imap,
+                                                       XFS_WRITE_IMAPS);
                aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1));
                ioalign = XFS_B_TO_FSBT(mp, aligned_offset);
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 46bd9d52ab51..eec226f78a40 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -120,7 +120,7 @@ xlog_verify_iclog(
        struct xlog             *log,
        struct xlog_in_core     *iclog,
        int                     count,
-        boolean_t               syncing);
+        bool                    syncing);
 STATIC void
 xlog_verify_tail_lsn(
        struct xlog             *log,
@@ -1737,7 +1737,7 @@ xlog_sync(
        ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
        ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);
-        xlog_verify_iclog(log, iclog, count, B_TRUE);
+        xlog_verify_iclog(log, iclog, count, true);
        /* account for log which doesn't start at block #0 */
        XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart);
@@ -3611,7 +3611,7 @@ xlog_verify_iclog(
        struct xlog             *log,
        struct xlog_in_core     *iclog,
        int                     count,
-        boolean_t               syncing)
+        bool                    syncing)
 {
        xlog_op_header_t        *ophead;
        xlog_in_core_t          *icptr;
@@ -3659,7 +3659,7 @@ xlog_verify_iclog(
                /* clientid is only 1 byte */
                field_offset = (__psint_t)
                               ((xfs_caddr_t)&(ophead->oh_clientid) - base_ptr);
-                if (syncing == B_FALSE || (field_offset & 0x1ff)) {
+                if (!syncing || (field_offset & 0x1ff)) {
                        clientid = ophead->oh_clientid;
                } else {
                        idx = BTOBBT((xfs_caddr_t)&(ophead->oh_clientid) - iclog->ic_datap);
@@ -3682,7 +3682,7 @@ xlog_verify_iclog(
                /* check length */
                field_offset = (__psint_t)
                               ((xfs_caddr_t)&(ophead->oh_len) - base_ptr);
-                if (syncing == B_FALSE || (field_offset & 0x1ff)) {
+                if (!syncing || (field_offset & 0x1ff)) {
                        op_len = be32_to_cpu(ophead->oh_len);
                } else {
                        idx = BTOBBT((__psint_t)&ophead->oh_len -
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 96fcbb85ff83..d1dba7ce75ae 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1442,9 +1442,8 @@ xlog_recover_find_tid(
        xlog_tid_t              tid)
 {
        xlog_recover_t          *trans;
-        struct hlist_node       *n;
-        hlist_for_each_entry(trans, n, head, r_list) {
+        hlist_for_each_entry(trans, head, r_list) {
                if (trans->r_log_tid == tid)
                        return trans;
        }
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index da508463ff10..3806088a8f77 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -658,7 +658,7 @@ xfs_sb_quiet_read_verify(
                return;
        }
        /* quietly fail */
-        xfs_buf_ioerror(bp, EFSCORRUPTED);
+        xfs_buf_ioerror(bp, EWRONGFS);
 }
 static void
@@ -1109,8 +1109,8 @@ xfs_mount_reset_sbqflags(
                return 0;
        tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
-        error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
+        error = xfs_trans_reserve(tp, 0, XFS_QM_SBCHANGE_LOG_RES(mp),
-                                      XFS_DEFAULT_LOG_COUNT);
+                                  0, 0, XFS_DEFAULT_LOG_COUNT);
        if (error) {
                xfs_trans_cancel(tp, 0);
                xfs_alert(mp, "%s: Superblock update failed!", __func__);
@@ -1583,8 +1583,8 @@ xfs_log_sbcount(xfs_mount_t *mp)
                return 0;
        tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT, KM_SLEEP);
-        error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
+        error = xfs_trans_reserve(tp, 0, XFS_SB_LOG_RES(mp), 0, 0,
-                                        XFS_DEFAULT_LOG_COUNT);
+                                  XFS_DEFAULT_LOG_COUNT);
        if (error) {
                xfs_trans_cancel(tp, 0);
                return error;
@@ -1945,8 +1945,8 @@ xfs_mount_log_sb(
                         XFS_SB_VERSIONNUM));
        tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT);
-        error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
+        error = xfs_trans_reserve(tp, 0, XFS_SB_LOG_RES(mp), 0, 0,
-                                XFS_DEFAULT_LOG_COUNT);
+                                  XFS_DEFAULT_LOG_COUNT);
        if (error) {
                xfs_trans_cancel(tp, 0);
                return error;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index bab8314507e4..bc907061d392 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -34,12 +34,19 @@ typedef struct xfs_trans_reservations {
        uint    tr_addafork;    /* cvt inode to attributed trans */
        uint    tr_writeid;     /* write setuid/setgid file */
        uint    tr_attrinval;   /* attr fork buffer invalidation */
-        uint    tr_attrset;     /* set/create an attribute */
+        uint    tr_attrsetm;    /* set/create an attribute at mount time */
+        uint    tr_attrsetrt;   /* set/create an attribute at runtime */
        uint    tr_attrrm;      /* remove an attribute */
        uint    tr_clearagi;    /* clear bad agi unlinked ino bucket */
        uint    tr_growrtalloc; /* grow realtime allocations */
        uint    tr_growrtzero;  /* grow realtime zeroing */
        uint    tr_growrtfree;  /* grow realtime freeing */
+        uint    tr_qm_sbchange; /* change quota flags */
+        uint    tr_qm_setqlim;  /* adjust quota limits */
+        uint    tr_qm_dqalloc;  /* allocate quota on disk */
+        uint    tr_qm_quotaoff; /* turn quota off */
+        uint    tr_qm_equotaoff;/* end of turn quota off */
+        uint    tr_sb;          /* modify superblock */
 } xfs_trans_reservations_t;
 #ifndef __KERNEL__
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 60eff4763156..e5b5cf973781 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -1584,10 +1584,9 @@ xfs_qm_write_sb_changes(
        int             error;
        tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
-        if ((error = xfs_trans_reserve(tp, 0,
+        error = xfs_trans_reserve(tp, 0, XFS_QM_SBCHANGE_LOG_RES(mp),
-                                      mp->m_sb.sb_sectsize + 128, 0,
+                                  0, 0, XFS_DEFAULT_LOG_COUNT);
-                                      0,
+        if (error) {
-                                      XFS_DEFAULT_LOG_COUNT))) {
                xfs_trans_cancel(tp, 0);
                return error;
        }
diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c
index 6b39115bf145..2d02eac1c9a8 100644
--- a/fs/xfs/xfs_qm_bhv.c
+++ b/fs/xfs/xfs_qm_bhv.c
@@ -146,7 +146,7 @@ xfs_qm_newmount(
                         * inode goes inactive and wants to free blocks,
                         * or via xfs_log_mount_finish.
                         */
-                        *needquotamount = B_TRUE;
+                        *needquotamount = true;
                        *quotaflags = mp->m_qflags;
                        mp->m_qflags = 0;
                }
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 5f53e75409b8..cf9a34051e07 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -408,10 +408,10 @@ xfs_qm_scall_getqstat(
 {
        struct xfs_quotainfo    *q = mp->m_quotainfo;
        struct xfs_inode        *uip, *gip;
-        boolean_t               tempuqip, tempgqip;
+        bool                    tempuqip, tempgqip;
        uip = gip = NULL;
-        tempuqip = tempgqip = B_FALSE;
+        tempuqip = tempgqip = false;
        memset(out, 0, sizeof(fs_quota_stat_t));
        out->qs_version = FS_QSTAT_VERSION;
@@ -434,12 +434,12 @@ xfs_qm_scall_getqstat(
        if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) {
                if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
                                        0, 0, &uip) == 0)
-                        tempuqip = B_TRUE;
+                        tempuqip = true;
        }
        if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) {
                if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
                                        0, 0, &gip) == 0)
-                        tempgqip = B_TRUE;
+                        tempgqip = true;
        }
        if (uip) {
                out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks;
@@ -490,8 +490,9 @@ xfs_qm_scall_setqlim(
                return 0;
        tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
-        if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_disk_dquot_t) + 128,
+        error = xfs_trans_reserve(tp, 0, XFS_QM_SETQLIM_LOG_RES(mp),
-                                      0, 0, XFS_DEFAULT_LOG_COUNT))) {
+                                  0, 0, XFS_DEFAULT_LOG_COUNT);
+        if (error) {
                xfs_trans_cancel(tp, 0);
                return (error);
        }
@@ -638,8 +639,9 @@ xfs_qm_log_quotaoff_end(
        tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF_END);
-        if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_qoff_logitem_t) * 2,
+        error = xfs_trans_reserve(tp, 0, XFS_QM_QUOTAOFF_END_LOG_RES(mp),
-                                      0, 0, XFS_DEFAULT_LOG_COUNT))) {
+                                  0, 0, XFS_DEFAULT_LOG_COUNT);
+        if (error) {
                xfs_trans_cancel(tp, 0);
                return (error);
        }
@@ -671,14 +673,10 @@ xfs_qm_log_quotaoff(
        uint                    oldsbqflag=0;
        tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF);
-        if ((error = xfs_trans_reserve(tp, 0,
+        error = xfs_trans_reserve(tp, 0, XFS_QM_QUOTAOFF_LOG_RES(mp),
-                                      sizeof(xfs_qoff_logitem_t) * 2 +
+                                  0, 0, XFS_DEFAULT_LOG_COUNT);
-                                      mp->m_sb.sb_sectsize + 128,
+        if (error)
-                                      0,
-                                      0,
-                                      XFS_DEFAULT_LOG_COUNT))) {
                goto error0;
-        }
        qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT);
        xfs_trans_log_quotaoff_item(tp, qoffi);
@@ -784,11 +782,11 @@ xfs_qm_scall_getquota(
             (XFS_IS_OQUOTA_ENFORCED(mp) &&
                        (dst->d_flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)))) &&
            dst->d_id != 0) {
-                if (((int) dst->d_bcount > (int) dst->d_blk_softlimit) &&
+                if ((dst->d_bcount > dst->d_blk_softlimit) &&
                    (dst->d_blk_softlimit > 0)) {
                        ASSERT(dst->d_btimer != 0);
                }
-                if (((int) dst->d_icount > (int) dst->d_ino_softlimit) &&
+                if ((dst->d_icount > dst->d_ino_softlimit) &&
                    (dst->d_ino_softlimit > 0)) {
                        ASSERT(dst->d_itimer != 0);
                }
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index ab8839b26272..c407121873b4 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -139,9 +139,9 @@ static const match_table_t tokens = {
 STATIC unsigned long
-suffix_strtoul(char *s, char **endp, unsigned int base)
+suffix_kstrtoint(char *s, unsigned int base, int *res)
 {
-        int     last, shift_left_factor = 0;
+        int     last, shift_left_factor = 0, _res;
        char    *value = s;
        last = strlen(value) - 1;
@@ -158,7 +158,10 @@ suffix_strtoul(char *s, char **endp, unsigned int base)
                value[last] = '\0';
        }
-        return simple_strtoul((const char *)s, endp, base) << shift_left_factor;
+        if (kstrtoint(s, base, &_res))
+                return -EINVAL;
+        *res = _res << shift_left_factor;
+        return 0;
 }
 /*
@@ -174,7 +177,7 @@ xfs_parseargs(
        char                    *options)
 {
        struct super_block      *sb = mp->m_super;
-        char                    *this_char, *value, *eov;
+        char                    *this_char, *value;
        int                     dsunit = 0;
        int                     dswidth = 0;
        int                     iosize = 0;
@@ -230,14 +233,16 @@ xfs_parseargs(
                                        this_char);
                                return EINVAL;
                        }
-                        mp->m_logbufs = simple_strtoul(value, &eov, 10);
+                        if (kstrtoint(value, 10, &mp->m_logbufs))
+                                return EINVAL;
                } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) {
                        if (!value || !*value) {
                                xfs_warn(mp, "%s option requires an argument",
                                        this_char);
                                return EINVAL;
                        }
-                        mp->m_logbsize = suffix_strtoul(value, &eov, 10);
+                        if (suffix_kstrtoint(value, 10, &mp->m_logbsize))
+                                return EINVAL;
                } else if (!strcmp(this_char, MNTOPT_LOGDEV)) {
                        if (!value || !*value) {
                                xfs_warn(mp, "%s option requires an argument",
@@ -266,7 +271,8 @@ xfs_parseargs(
                                        this_char);
                                return EINVAL;
                        }
-                        iosize = simple_strtoul(value, &eov, 10);
+                        if (kstrtoint(value, 10, &iosize))
+                                return EINVAL;
                        iosizelog = ffs(iosize) - 1;
                } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) {
                        if (!value || !*value) {
@@ -274,7 +280,8 @@ xfs_parseargs(
                                        this_char);
                                return EINVAL;
                        }
-                        iosize = suffix_strtoul(value, &eov, 10);
+                        if (suffix_kstrtoint(value, 10, &iosize))
+                                return EINVAL;
                        iosizelog = ffs(iosize) - 1;
                } else if (!strcmp(this_char, MNTOPT_GRPID) ||
                           !strcmp(this_char, MNTOPT_BSDGROUPS)) {
@@ -296,14 +303,16 @@ xfs_parseargs(
                                        this_char);
                                return EINVAL;
                        }
-                        dsunit = simple_strtoul(value, &eov, 10);
+                        if (kstrtoint(value, 10, &dsunit))
+                                return EINVAL;
                } else if (!strcmp(this_char, MNTOPT_SWIDTH)) {
                        if (!value || !*value) {
                                xfs_warn(mp, "%s option requires an argument",
                                        this_char);
                                return EINVAL;
                        }
-                        dswidth = simple_strtoul(value, &eov, 10);
+                        if (kstrtoint(value, 10, &dswidth))
+                                return EINVAL;
                } else if (!strcmp(this_char, MNTOPT_32BITINODE)) {
                        mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
                } else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 2e137d4a85ae..16a812977eab 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -341,6 +341,7 @@ DEFINE_BUF_EVENT(xfs_buf_item_relse);
 DEFINE_BUF_EVENT(xfs_buf_item_iodone);
 DEFINE_BUF_EVENT(xfs_buf_item_iodone_async);
 DEFINE_BUF_EVENT(xfs_buf_error_relse);
+DEFINE_BUF_EVENT(xfs_buf_wait_buftarg);
 DEFINE_BUF_EVENT(xfs_trans_read_buf_io);
 DEFINE_BUF_EVENT(xfs_trans_read_buf_shut);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 06ed520a767f..2fd7c1ff1d21 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -37,14 +37,45 @@
 #include "xfs_extent_busy.h"
 #include "xfs_bmap.h"
 #include "xfs_quota.h"
+#include "xfs_qm.h"
 #include "xfs_trans_priv.h"
 #include "xfs_trans_space.h"
 #include "xfs_inode_item.h"
+#include "xfs_log_priv.h"
+#include "xfs_buf_item.h"
 #include "xfs_trace.h"
 kmem_zone_t     *xfs_trans_zone;
 kmem_zone_t     *xfs_log_item_desc_zone;
+/*
+ * A buffer has a format structure overhead in the log in addition
+ * to the data, so we need to take this into account when reserving
+ * space in a transaction for a buffer.  Round the space required up
+ * to a multiple of 128 bytes so that we don't change the historical
+ * reservation that has been used for this overhead.
+ */
+STATIC uint
+xfs_buf_log_overhead(void)
+{
+        return round_up(sizeof(struct xlog_op_header) +
+                        sizeof(struct xfs_buf_log_format), 128);
+}
+/*
+ * Calculate out transaction log reservation per item in bytes.
+ *
+ * The nbufs argument is used to indicate the number of items that
+ * will be changed in a transaction.  size is used to tell how many
+ * bytes should be reserved per item.
+ */
+STATIC uint
+xfs_calc_buf_res(
+        uint            nbufs,
+        uint            size)
+{
+        return nbufs * (size + xfs_buf_log_overhead());
+}
 /*
 * Various log reservation values.
@@ -85,18 +116,15 @@ xfs_calc_write_reservation(
        struct xfs_mount        *mp)
 {
        return XFS_DQUOT_LOGRES(mp) +
-                MAX((mp->m_sb.sb_inodesize +
+                MAX((xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
-                     XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) +
+                     xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
-                     2 * mp->m_sb.sb_sectsize +
+                                      XFS_FSB_TO_B(mp, 1)) +
-                     mp->m_sb.sb_sectsize +
+                     xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
-                     XFS_ALLOCFREE_LOG_RES(mp, 2) +
+                     xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
-                     128 * (4 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) +
+                                      XFS_FSB_TO_B(mp, 1))),
-                            XFS_ALLOCFREE_LOG_COUNT(mp, 2))),
+                    (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
-                    (2 * mp->m_sb.sb_sectsize +
+                     xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
-                     2 * mp->m_sb.sb_sectsize +
+                                      XFS_FSB_TO_B(mp, 1))));
-                     mp->m_sb.sb_sectsize +
-                     XFS_ALLOCFREE_LOG_RES(mp, 2) +
-                     128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
 }
 /*
@@ -117,18 +145,17 @@ xfs_calc_itruncate_reservation(
        struct xfs_mount        *mp)
 {
        return XFS_DQUOT_LOGRES(mp) +
-                MAX((mp->m_sb.sb_inodesize +
+                MAX((xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
-                     XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1) +
+                     xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1,
-                     128 * (2 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK))),
+                                      XFS_FSB_TO_B(mp, 1))),
-                    (4 * mp->m_sb.sb_sectsize +
+                    (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
-                     4 * mp->m_sb.sb_sectsize +
+                     xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4),
-                     mp->m_sb.sb_sectsize +
+                                      XFS_FSB_TO_B(mp, 1)) +
-                     XFS_ALLOCFREE_LOG_RES(mp, 4) +
+                    xfs_calc_buf_res(5, 0) +
-                     128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4)) +
+                    xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
-                     128 * 5 +
+                                     XFS_FSB_TO_B(mp, 1)) +
-                     XFS_ALLOCFREE_LOG_RES(mp, 1) +
+                    xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) +
-                     128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
+                                     mp->m_in_maxlevels, 0)));
-                            XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
 }
 /*
@@ -148,14 +175,12 @@ xfs_calc_rename_reservation(
        struct xfs_mount        *mp)
 {
        return XFS_DQUOT_LOGRES(mp) +
-                MAX((4 * mp->m_sb.sb_inodesize +
+                MAX((xfs_calc_buf_res(4, mp->m_sb.sb_inodesize) +
-                     2 * XFS_DIROP_LOG_RES(mp) +
+                     xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp),
-                     128 * (4 + 2 * XFS_DIROP_LOG_COUNT(mp))),
+                                      XFS_FSB_TO_B(mp, 1))),
-                    (3 * mp->m_sb.sb_sectsize +
+                    (xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) +
-                     3 * mp->m_sb.sb_sectsize +
+                     xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 3),
-                     mp->m_sb.sb_sectsize +
+                                      XFS_FSB_TO_B(mp, 1))));
-                     XFS_ALLOCFREE_LOG_RES(mp, 3) +
-                     128 * (7 + XFS_ALLOCFREE_LOG_COUNT(mp, 3))));
 }
 /*
@@ -175,15 +200,12 @@ xfs_calc_link_reservation(
        struct xfs_mount        *mp)
 {
        return XFS_DQUOT_LOGRES(mp) +
-                MAX((mp->m_sb.sb_inodesize +
+                MAX((xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) +
-                     mp->m_sb.sb_inodesize +
+                     xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
-                     XFS_DIROP_LOG_RES(mp) +
+                                      XFS_FSB_TO_B(mp, 1))),
-                     128 * (2 + XFS_DIROP_LOG_COUNT(mp))),
+                    (xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
-                    (mp->m_sb.sb_sectsize +
+                     xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
-                     mp->m_sb.sb_sectsize +
+                                      XFS_FSB_TO_B(mp, 1))));
-                     mp->m_sb.sb_sectsize +
-                     XFS_ALLOCFREE_LOG_RES(mp, 1) +
-                     128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
 }
 /*
@@ -203,15 +225,12 @@ xfs_calc_remove_reservation(
        struct xfs_mount        *mp)
 {
        return XFS_DQUOT_LOGRES(mp) +
-                MAX((mp->m_sb.sb_inodesize +
+                MAX((xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) +
-                     mp->m_sb.sb_inodesize +
+                     xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
-                     XFS_DIROP_LOG_RES(mp) +
+                                      XFS_FSB_TO_B(mp, 1))),
-                     128 * (2 + XFS_DIROP_LOG_COUNT(mp))),
+                    (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
-                    (2 * mp->m_sb.sb_sectsize +
+                     xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
-                     2 * mp->m_sb.sb_sectsize +
+                                      XFS_FSB_TO_B(mp, 1))));
-                     mp->m_sb.sb_sectsize +
-                     XFS_ALLOCFREE_LOG_RES(mp, 2) +
-                     128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
 }
 /*
@@ -233,18 +252,18 @@ xfs_calc_symlink_reservation(
        struct xfs_mount        *mp)
 {
        return XFS_DQUOT_LOGRES(mp) +
-                MAX((mp->m_sb.sb_inodesize +
+                MAX((xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) +
-                     mp->m_sb.sb_inodesize +
+                     xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) +
-                     XFS_FSB_TO_B(mp, 1) +
+                     xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
-                     XFS_DIROP_LOG_RES(mp) +
+                                      XFS_FSB_TO_B(mp, 1)) +
-                     1024 +
+                     xfs_calc_buf_res(1, 1024)),
-                     128 * (4 + XFS_DIROP_LOG_COUNT(mp))),
+                    (xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
-                    (2 * mp->m_sb.sb_sectsize +
+                     xfs_calc_buf_res(XFS_IALLOC_BLOCKS(mp),
-                     XFS_FSB_TO_B(mp, XFS_IALLOC_BLOCKS(mp)) +
+                                      XFS_FSB_TO_B(mp, 1)) +
-                     XFS_FSB_TO_B(mp, mp->m_in_maxlevels) +
+                     xfs_calc_buf_res(mp->m_in_maxlevels,
-                     XFS_ALLOCFREE_LOG_RES(mp, 1) +
+                                      XFS_FSB_TO_B(mp, 1)) +
-                     128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
+                     xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
-                            XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
+                                      XFS_FSB_TO_B(mp, 1))));
 }
 /*
@@ -267,18 +286,19 @@ xfs_calc_create_reservation(
        struct xfs_mount        *mp)
 {
        return XFS_DQUOT_LOGRES(mp) +
-                MAX((mp->m_sb.sb_inodesize +
+                MAX((xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) +
-                     mp->m_sb.sb_inodesize +
+                     xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
+                     (uint)XFS_FSB_TO_B(mp, 1) +
+                     xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
+                                      XFS_FSB_TO_B(mp, 1))),
+                    (xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
                     mp->m_sb.sb_sectsize +
-                     XFS_FSB_TO_B(mp, 1) +
+                     xfs_calc_buf_res(XFS_IALLOC_BLOCKS(mp),
-                     XFS_DIROP_LOG_RES(mp) +
+                                      XFS_FSB_TO_B(mp, 1)) +
-                     128 * (3 + XFS_DIROP_LOG_COUNT(mp))),
+                     xfs_calc_buf_res(mp->m_in_maxlevels,
-                    (3 * mp->m_sb.sb_sectsize +
+                                      XFS_FSB_TO_B(mp, 1)) +
-                     XFS_FSB_TO_B(mp, XFS_IALLOC_BLOCKS(mp)) +
+                     xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
-                     XFS_FSB_TO_B(mp, mp->m_in_maxlevels) +
+                                      XFS_FSB_TO_B(mp, 1))));
-                     XFS_ALLOCFREE_LOG_RES(mp, 1) +
-                     128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
-                            XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
 }
 /*
@@ -306,16 +326,16 @@ xfs_calc_ifree_reservation(
        struct xfs_mount        *mp)
 {
        return XFS_DQUOT_LOGRES(mp) +
-                mp->m_sb.sb_inodesize +
+                xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
-                mp->m_sb.sb_sectsize +
+                xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
-                mp->m_sb.sb_sectsize +
+                xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) +
-                XFS_FSB_TO_B(mp, 1) +
                MAX((__uint16_t)XFS_FSB_TO_B(mp, 1),
                    XFS_INODE_CLUSTER_SIZE(mp)) +
-                128 * 5 +
+                xfs_calc_buf_res(1, 0) +
-                XFS_ALLOCFREE_LOG_RES(mp, 1) +
+                xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) +
-                128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
+                                 mp->m_in_maxlevels, 0) +
-                       XFS_ALLOCFREE_LOG_COUNT(mp, 1));
+                xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+                                 XFS_FSB_TO_B(mp, 1));
 }
 /*
@@ -343,9 +363,9 @@ STATIC uint
 xfs_calc_growdata_reservation(
        struct xfs_mount        *mp)
 {
-        return mp->m_sb.sb_sectsize * 3 +
+        return xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
-                XFS_ALLOCFREE_LOG_RES(mp, 1) +
+                xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
-                128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1));
+                                 XFS_FSB_TO_B(mp, 1));
 }
 /*
@@ -362,12 +382,12 @@ STATIC uint
 xfs_calc_growrtalloc_reservation(
        struct xfs_mount        *mp)
 {
-        return 2 * mp->m_sb.sb_sectsize +
+        return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
-                XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) +
+                xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
-                mp->m_sb.sb_inodesize +
+                                 XFS_FSB_TO_B(mp, 1)) +
-                XFS_ALLOCFREE_LOG_RES(mp, 1) +
+                xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
-                128 * (3 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) +
+                xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
-                       XFS_ALLOCFREE_LOG_COUNT(mp, 1));
+                                 XFS_FSB_TO_B(mp, 1));
 }
 /*
@@ -379,7 +399,7 @@ STATIC uint
 xfs_calc_growrtzero_reservation(
        struct xfs_mount        *mp)
 {
-        return mp->m_sb.sb_blocksize + 128;
+        return xfs_calc_buf_res(1, mp->m_sb.sb_blocksize);
 }
 /*
@@ -396,11 +416,10 @@ STATIC uint
 xfs_calc_growrtfree_reservation(
        struct xfs_mount        *mp)
 {
-        return mp->m_sb.sb_sectsize +
+        return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
-                2 * mp->m_sb.sb_inodesize +
+                xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) +
-                mp->m_sb.sb_blocksize +
+                xfs_calc_buf_res(1, mp->m_sb.sb_blocksize) +
-                mp->m_rsumsize +
+                xfs_calc_buf_res(1, mp->m_rsumsize);
-                128 * 5;
 }
 /*
@@ -411,7 +430,7 @@ STATIC uint
 xfs_calc_swrite_reservation(
        struct xfs_mount        *mp)
 {
-        return mp->m_sb.sb_inodesize + 128;
+        return xfs_calc_buf_res(1, mp->m_sb.sb_inodesize);
 }
 /*
@@ -421,7 +440,7 @@ xfs_calc_swrite_reservation(
 STATIC uint
 xfs_calc_writeid_reservation(xfs_mount_t *mp)
 {
-        return mp->m_sb.sb_inodesize + 128;
+        return xfs_calc_buf_res(1, mp->m_sb.sb_inodesize);
 }
 /*
@@ -437,13 +456,13 @@ xfs_calc_addafork_reservation(
        struct xfs_mount        *mp)
 {
        return XFS_DQUOT_LOGRES(mp) +
-                mp->m_sb.sb_inodesize +
+                xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
-                mp->m_sb.sb_sectsize * 2 +
+                xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
-                mp->m_dirblksize +
+                xfs_calc_buf_res(1, mp->m_dirblksize) +
-                XFS_FSB_TO_B(mp, XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1) +
+                xfs_calc_buf_res(XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1,
-                XFS_ALLOCFREE_LOG_RES(mp, 1) +
+                                 XFS_FSB_TO_B(mp, 1)) +
-                128 * (4 + XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1 +
+                xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
-                       XFS_ALLOCFREE_LOG_COUNT(mp, 1));
+                                 XFS_FSB_TO_B(mp, 1));
 }
 /*
@@ -461,35 +480,51 @@ STATIC uint
 xfs_calc_attrinval_reservation(
        struct xfs_mount        *mp)
 {
-        return MAX((mp->m_sb.sb_inodesize +
+        return MAX((xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
-                    XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
+                    xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK),
-                    128 * (1 + XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))),
+                                     XFS_FSB_TO_B(mp, 1))),
-                   (4 * mp->m_sb.sb_sectsize +
+                   (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
-                    4 * mp->m_sb.sb_sectsize +
+                    xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4),
-                    mp->m_sb.sb_sectsize +
+                                     XFS_FSB_TO_B(mp, 1))));
-                    XFS_ALLOCFREE_LOG_RES(mp, 4) +
-                    128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))));
 }
 /*
- * Setting an attribute.
+ * Setting an attribute at mount time.
 *      the inode getting the attribute
 *      the superblock for allocations
 *      the agfs extents are allocated from
 *      the attribute btree * max depth
 *      the inode allocation btree
 * Since attribute transaction space is dependent on the size of the attribute,
- * the calculation is done partially at mount time and partially at runtime.
+ * the calculation is done partially at mount time and partially at runtime(see
+ * below).
 */
 STATIC uint
-xfs_calc_attrset_reservation(
+xfs_calc_attrsetm_reservation(
        struct xfs_mount        *mp)
 {
        return XFS_DQUOT_LOGRES(mp) +
-                mp->m_sb.sb_inodesize +
+                xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
-                mp->m_sb.sb_sectsize +
+                xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
-                XFS_FSB_TO_B(mp, XFS_DA_NODE_MAXDEPTH) +
+                xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, XFS_FSB_TO_B(mp, 1));
-                128 * (2 + XFS_DA_NODE_MAXDEPTH);
+}
+/*
+ * Setting an attribute at runtime, transaction space unit per block.
+ *      the superblock for allocations: sector size
+ *      the inode bmap btree could join or split: max depth * block size
+ * Since the runtime attribute transaction space is dependent on the total
+ * blocks needed for the 1st bmap, here we calculate out the space unit for
+ * one block so that the caller could figure out the total space according
+ * to the attibute extent length in blocks by: ext * XFS_ATTRSETRT_LOG_RES(mp).
+ */
+STATIC uint
+xfs_calc_attrsetrt_reservation(
+        struct xfs_mount        *mp)
+{
+        return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
+                xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK),
+                                 XFS_FSB_TO_B(mp, 1));
 }
 /*
@@ -508,16 +543,15 @@ xfs_calc_attrrm_reservation(
        struct xfs_mount        *mp)
 {
        return XFS_DQUOT_LOGRES(mp) +
-                MAX((mp->m_sb.sb_inodesize +
+                MAX((xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
-                     XFS_FSB_TO_B(mp, XFS_DA_NODE_MAXDEPTH) +
+                     xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH,
-                     XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
+                                      XFS_FSB_TO_B(mp, 1)) +
-                     128 * (1 + XFS_DA_NODE_MAXDEPTH +
+                     (uint)XFS_FSB_TO_B(mp,
-                            XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK))),
+                                        XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
-                    (2 * mp->m_sb.sb_sectsize +
+                     xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 0)),
-                     2 * mp->m_sb.sb_sectsize +
+                    (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
-                     mp->m_sb.sb_sectsize +
+                     xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
-                     XFS_ALLOCFREE_LOG_RES(mp, 2) +
+                                      XFS_FSB_TO_B(mp, 1))));
-                     128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
 }
 /*
@@ -527,7 +561,78 @@ STATIC uint
 xfs_calc_clear_agi_bucket_reservation(
        struct xfs_mount        *mp)
 {
-        return mp->m_sb.sb_sectsize + 128;
+        return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
+}
+/*
+ * Clearing the quotaflags in the superblock.
+ *      the super block for changing quota flags: sector size
+ */
+STATIC uint
+xfs_calc_qm_sbchange_reservation(
+        struct xfs_mount        *mp)
+{
+        return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
+}
+/*
+ * Adjusting quota limits.
+ *    the xfs_disk_dquot_t: sizeof(struct xfs_disk_dquot)
+ */
+STATIC uint
+xfs_calc_qm_setqlim_reservation(
+        struct xfs_mount        *mp)
+{
+        return xfs_calc_buf_res(1, sizeof(struct xfs_disk_dquot));
+}
+/*
+ * Allocating quota on disk if needed.
+ *      the write transaction log space: XFS_WRITE_LOG_RES(mp)
+ *      the unit of quota allocation: one system block size
+ */
+STATIC uint
+xfs_calc_qm_dqalloc_reservation(
+        struct xfs_mount        *mp)
+{
+        return XFS_WRITE_LOG_RES(mp) +
+                xfs_calc_buf_res(1,
+                        XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1);
+}
+/*
+ * Turning off quotas.
+ *    the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2
+ *    the superblock for the quota flags: sector size
+ */
+STATIC uint
+xfs_calc_qm_quotaoff_reservation(
+        struct xfs_mount        *mp)
+{
+        return sizeof(struct xfs_qoff_logitem) * 2 +
+                xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
+}
+/*
+ * End of turning off quotas.
+ *    the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2
+ */
+STATIC uint
+xfs_calc_qm_quotaoff_end_reservation(
+        struct xfs_mount        *mp)
+{
+        return sizeof(struct xfs_qoff_logitem) * 2;
+}
+/*
+ * Syncing the incore super block changes to disk.
+ *     the super block to reflect the changes: sector size
+ */
+STATIC uint
+xfs_calc_sb_reservation(
+        struct xfs_mount        *mp)
+{
+        return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
 }
 /*
@@ -555,12 +660,19 @@ xfs_trans_init(
        resp->tr_writeid = xfs_calc_writeid_reservation(mp);
        resp->tr_addafork = xfs_calc_addafork_reservation(mp);
        resp->tr_attrinval = xfs_calc_attrinval_reservation(mp);
-        resp->tr_attrset = xfs_calc_attrset_reservation(mp);
+        resp->tr_attrsetm = xfs_calc_attrsetm_reservation(mp);
+        resp->tr_attrsetrt = xfs_calc_attrsetrt_reservation(mp);
        resp->tr_attrrm = xfs_calc_attrrm_reservation(mp);
        resp->tr_clearagi = xfs_calc_clear_agi_bucket_reservation(mp);
        resp->tr_growrtalloc = xfs_calc_growrtalloc_reservation(mp);
        resp->tr_growrtzero = xfs_calc_growrtzero_reservation(mp);
        resp->tr_growrtfree = xfs_calc_growrtfree_reservation(mp);
+        resp->tr_qm_sbchange = xfs_calc_qm_sbchange_reservation(mp);
+        resp->tr_qm_setqlim = xfs_calc_qm_setqlim_reservation(mp);
+        resp->tr_qm_dqalloc = xfs_calc_qm_dqalloc_reservation(mp);
+        resp->tr_qm_quotaoff = xfs_calc_qm_quotaoff_reservation(mp);
+        resp->tr_qm_equotaoff = xfs_calc_qm_quotaoff_end_reservation(mp);
+        resp->tr_sb = xfs_calc_sb_reservation(mp);
 }
 /*
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index c6c0601abd7a..cd29f6171021 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -252,17 +252,19 @@ struct xfs_log_item_desc {
 * as long as SWRITE logs the entire inode core
 */
 #define XFS_FSYNC_TS_LOG_RES(mp)        ((mp)->m_reservations.tr_swrite)
-#define XFS_WRITEID_LOG_RES(mp) ((mp)->m_reservations.tr_swrite)
+#define XFS_WRITEID_LOG_RES(mp)         ((mp)->m_reservations.tr_swrite)
 #define XFS_ADDAFORK_LOG_RES(mp)        ((mp)->m_reservations.tr_addafork)
 #define XFS_ATTRINVAL_LOG_RES(mp)       ((mp)->m_reservations.tr_attrinval)
-#define XFS_ATTRSET_LOG_RES(mp, ext)    \
+#define XFS_ATTRSETM_LOG_RES(mp)        ((mp)->m_reservations.tr_attrsetm)
-        ((mp)->m_reservations.tr_attrset + \
+#define XFS_ATTRSETRT_LOG_RES(mp)       ((mp)->m_reservations.tr_attrsetrt)
-         (ext * (mp)->m_sb.sb_sectsize) + \
+#define XFS_ATTRRM_LOG_RES(mp)          ((mp)->m_reservations.tr_attrrm)
-         (ext * XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))) + \
-         (128 * (ext + (ext * XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)))))
-#define XFS_ATTRRM_LOG_RES(mp)  ((mp)->m_reservations.tr_attrrm)
 #define XFS_CLEAR_AGI_BUCKET_LOG_RES(mp)  ((mp)->m_reservations.tr_clearagi)
+#define XFS_QM_SBCHANGE_LOG_RES(mp)     ((mp)->m_reservations.tr_qm_sbchange)
+#define XFS_QM_SETQLIM_LOG_RES(mp)      ((mp)->m_reservations.tr_qm_setqlim)
+#define XFS_QM_DQALLOC_LOG_RES(mp)      ((mp)->m_reservations.tr_qm_dqalloc)
+#define XFS_QM_QUOTAOFF_LOG_RES(mp)     ((mp)->m_reservations.tr_qm_quotaoff)
+#define XFS_QM_QUOTAOFF_END_LOG_RES(mp) ((mp)->m_reservations.tr_qm_equotaoff)
+#define XFS_SB_LOG_RES(mp)              ((mp)->m_reservations.tr_sb)
 /*
 * Various log count values.
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 6011ee661339..0eda7254305f 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -55,20 +55,6 @@ xfs_ail_check(
                ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0);
-#ifdef XFS_TRANS_DEBUG
-        /*
-         * Walk the list checking lsn ordering, and that every entry has the
-         * XFS_LI_IN_AIL flag set. This is really expensive, so only do it
-         * when specifically debugging the transaction subsystem.
-         */
-        prev_lip = list_entry(&ailp->xa_ail, xfs_log_item_t, li_ail);
-        list_for_each_entry(lip, &ailp->xa_ail, li_ail) {
-                if (&prev_lip->li_ail != &ailp->xa_ail)
-                        ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0);
-                ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0);
-                prev_lip = lip;
-        }
-#endif /* XFS_TRANS_DEBUG */
 }
 #else /* !DEBUG */
 #define xfs_ail_check(a,l)
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 4fc17d479d42..3edf5dbee001 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -93,7 +93,7 @@ _xfs_trans_bjoin(
        xfs_buf_item_init(bp, tp->t_mountp);
        bip = bp->b_fspriv;
        ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
-        ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL));
+        ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL));
        ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
        if (reset_recur)
                bip->bli_recur = 0;
@@ -432,7 +432,7 @@ xfs_trans_brelse(xfs_trans_t	*tp,
        bip = bp->b_fspriv;
        ASSERT(bip->bli_item.li_type == XFS_LI_BUF);
        ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
-        ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL));
+        ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL));
        ASSERT(atomic_read(&bip->bli_refcount) > 0);
        trace_xfs_trans_brelse(bip);
@@ -519,7 +519,7 @@ xfs_trans_bhold(xfs_trans_t	*tp,
        ASSERT(bp->b_transp == tp);
        ASSERT(bip != NULL);
        ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
-        ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL));
+        ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL));
        ASSERT(atomic_read(&bip->bli_refcount) > 0);
        bip->bli_flags |= XFS_BLI_HOLD;
@@ -539,7 +539,7 @@ xfs_trans_bhold_release(xfs_trans_t	*tp,
        ASSERT(bp->b_transp == tp);
        ASSERT(bip != NULL);
        ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
-        ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL));
+        ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL));
        ASSERT(atomic_read(&bip->bli_refcount) > 0);
        ASSERT(bip->bli_flags & XFS_BLI_HOLD);
@@ -598,7 +598,7 @@ xfs_trans_log_buf(xfs_trans_t	*tp,
                bip->bli_flags &= ~XFS_BLI_STALE;
                ASSERT(XFS_BUF_ISSTALE(bp));
                XFS_BUF_UNSTALE(bp);
-                bip->bli_format.blf_flags &= ~XFS_BLF_CANCEL;
+                bip->__bli_format.blf_flags &= ~XFS_BLF_CANCEL;
        }
        tp->t_flags |= XFS_TRANS_DIRTY;
@@ -643,6 +643,7 @@ xfs_trans_binval(
        xfs_buf_t       *bp)
 {
        xfs_buf_log_item_t      *bip = bp->b_fspriv;
+        int                     i;
        ASSERT(bp->b_transp == tp);
        ASSERT(bip != NULL);
@@ -657,8 +658,8 @@ xfs_trans_binval(
                 */
                ASSERT(XFS_BUF_ISSTALE(bp));
                ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY)));
-                ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_INODE_BUF));
+                ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_INODE_BUF));
-                ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL);
+                ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
                ASSERT(bip->bli_item.li_desc->lid_flags & XFS_LID_DIRTY);
                ASSERT(tp->t_flags & XFS_TRANS_DIRTY);
                return;
@@ -668,10 +669,12 @@ xfs_trans_binval(
        bip->bli_flags |= XFS_BLI_STALE;
        bip->bli_flags &= ~(XFS_BLI_INODE_BUF | XFS_BLI_LOGGED | XFS_BLI_DIRTY);
-        bip->bli_format.blf_flags &= ~XFS_BLF_INODE_BUF;
+        bip->__bli_format.blf_flags &= ~XFS_BLF_INODE_BUF;
-        bip->bli_format.blf_flags |= XFS_BLF_CANCEL;
+        bip->__bli_format.blf_flags |= XFS_BLF_CANCEL;
-        memset((char *)(bip->bli_format.blf_data_map), 0,
+        for (i = 0; i < bip->bli_format_count; i++) {
-              (bip->bli_format.blf_map_size * sizeof(uint)));
+                memset(bip->bli_formats[i].blf_data_map, 0,
+                       (bip->bli_formats[i].blf_map_size * sizeof(uint)));
+        }
        bip->bli_item.li_desc->lid_flags |= XFS_LID_DIRTY;
        tp->t_flags |= XFS_TRANS_DIRTY;
 }
@@ -775,5 +778,5 @@ xfs_trans_dquot_buf(
               type == XFS_BLF_GDQUOT_BUF);
        ASSERT(atomic_read(&bip->bli_refcount) > 0);
-        bip->bli_format.blf_flags |= type;
+        bip->__bli_format.blf_flags |= type;
 }
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index 0c7fa54f309e..642c2d6e1db1 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -516,7 +516,7 @@ xfs_trans_unreserve_and_mod_dquots(
        int                     i, j;
        xfs_dquot_t             *dqp;
        xfs_dqtrx_t             *qtrx, *qa;
-        boolean_t               locked;
+        bool                    locked;
        if (!tp->t_dqinfo || !(tp->t_flags & XFS_TRANS_DQ_DIRTY))
                return;
@@ -537,17 +537,17 @@ xfs_trans_unreserve_and_mod_dquots(
                         * about the number of blocks used field, or deltas.
                         * Also we don't bother to zero the fields.
                         */
-                        locked = B_FALSE;
+                        locked = false;
                        if (qtrx->qt_blk_res) {
                                xfs_dqlock(dqp);
-                                locked = B_TRUE;
+                                locked = true;
                                dqp->q_res_bcount -=
                                        (xfs_qcnt_t)qtrx->qt_blk_res;
                        }
                        if (qtrx->qt_ino_res) {
                                if (!locked) {
                                        xfs_dqlock(dqp);
-                                        locked = B_TRUE;
+                                        locked = true;
                                }
                                dqp->q_res_icount -=
                                        (xfs_qcnt_t)qtrx->qt_ino_res;
@@ -556,7 +556,7 @@ xfs_trans_unreserve_and_mod_dquots(
                        if (qtrx->qt_rtblk_res) {
                                if (!locked) {
                                        xfs_dqlock(dqp);
-                                        locked = B_TRUE;
+                                        locked = true;
                                }
                                dqp->q_res_rtbcount -=
                                        (xfs_qcnt_t)qtrx->qt_rtblk_res;
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index d2eee20d5f5b..ac6d567704db 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -33,14 +33,6 @@
 #include "xfs_inode_item.h"
 #include "xfs_trace.h"
-#ifdef XFS_TRANS_DEBUG
-STATIC void
-xfs_trans_inode_broot_debug(
-        xfs_inode_t     *ip);
-#else
-#define xfs_trans_inode_broot_debug(ip)
-#endif
 /*
 * Add a locked inode to the transaction.
 *
@@ -67,8 +59,6 @@ xfs_trans_ijoin(
         * Get a log_item_desc to point at the new item.
         */
        xfs_trans_add_item(tp, &iip->ili_item);
-        xfs_trans_inode_broot_debug(ip);
 }
 /*
@@ -135,34 +125,3 @@ xfs_trans_log_inode(
        flags |= ip->i_itemp->ili_last_fields;
        ip->i_itemp->ili_fields |= flags;
 }
-#ifdef XFS_TRANS_DEBUG
-/*
- * Keep track of the state of the inode btree root to make sure we
- * log it properly.
- */
-STATIC void
-xfs_trans_inode_broot_debug(
-        xfs_inode_t     *ip)
-{
-        xfs_inode_log_item_t    *iip;
-        ASSERT(ip->i_itemp != NULL);
-        iip = ip->i_itemp;
-        if (iip->ili_root_size != 0) {
-                ASSERT(iip->ili_orig_root != NULL);
-                kmem_free(iip->ili_orig_root);
-                iip->ili_root_size = 0;
-                iip->ili_orig_root = NULL;
-        }
-        if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
-                ASSERT((ip->i_df.if_broot != NULL) &&
-                       (ip->i_df.if_broot_bytes > 0));
-                iip->ili_root_size = ip->i_df.if_broot_bytes;
-                iip->ili_orig_root =
-                        (char*)kmem_alloc(iip->ili_root_size, KM_SLEEP);
-                memcpy(iip->ili_orig_root, (char*)(ip->i_df.if_broot),
-                      iip->ili_root_size);
-        }
-}
-#endif
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h
index 7a41874f4c20..61ba1cfa974c 100644
--- a/fs/xfs/xfs_types.h
+++ b/fs/xfs/xfs_types.h
@@ -32,7 +32,6 @@ typedef unsigned int		__uint32_t;
 typedef signed long long int    __int64_t;
 typedef unsigned long long int  __uint64_t;
-typedef enum { B_FALSE,B_TRUE } boolean_t;
 typedef __uint32_t              prid_t;         /* project ID */
 typedef __uint32_t              inst_t;         /* an instruction */
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index d95f565a390e..77ad74834baa 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -725,7 +725,7 @@ xfs_create(
        int                     error;
        xfs_bmap_free_t         free_list;
        xfs_fsblock_t           first_block;
-        boolean_t               unlock_dp_on_error = B_FALSE;
+        bool                    unlock_dp_on_error = false;
        uint                    cancel_flags;
        int                     committed;
        prid_t                  prid;
@@ -794,7 +794,7 @@ xfs_create(
        }
        xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
-        unlock_dp_on_error = B_TRUE;
+        unlock_dp_on_error = true;
        xfs_bmap_init(&free_list, &first_block);
@@ -830,7 +830,7 @@ xfs_create(
         * error path.
         */
        xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
-        unlock_dp_on_error = B_FALSE;
+        unlock_dp_on_error = false;
        error = xfs_dir_createname(tp, dp, name, ip->i_ino,
                                        &first_block, &free_list, resblks ?
@@ -1367,7 +1367,7 @@ xfs_symlink(
        int                     pathlen;
        xfs_bmap_free_t         free_list;
        xfs_fsblock_t           first_block;
-        boolean_t               unlock_dp_on_error = B_FALSE;
+        bool                    unlock_dp_on_error = false;
        uint                    cancel_flags;
        int                     committed;
        xfs_fileoff_t           first_fsb;
@@ -1438,7 +1438,7 @@ xfs_symlink(
        }
        xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
-        unlock_dp_on_error = B_TRUE;
+        unlock_dp_on_error = true;
        /*
         * Check whether the directory allows new symlinks or not.
@@ -1484,7 +1484,7 @@ xfs_symlink(
         * error path.
         */
        xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
-        unlock_dp_on_error = B_FALSE;
+        unlock_dp_on_error = false;
        /*
         * Also attach the dquot(s) to it, if applicable.