75 files changed, 2995 insertions, 2453 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 9f7270f36b2a..525da2e8f73b 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -62,6 +62,16 @@ source "fs/autofs/Kconfig"
 source "fs/autofs4/Kconfig"
 source "fs/fuse/Kconfig"
+config CUSE
+        tristate "Character device in Userpace support"
+        depends on FUSE_FS
+        help
+          This FUSE extension allows character devices to be
+          implemented in userspace.
+          If you want to develop or use userspace character device
+          based on CUSE, answer Y or M.
 config GENERIC_ACL
        bool
        select FS_POSIX_ACL
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index 762d287123ca..da6061a6df40 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -39,6 +39,9 @@ struct configfs_dirent {
        umode_t                 s_mode;
        struct dentry           * s_dentry;
        struct iattr            * s_iattr;
+#ifdef CONFIG_LOCKDEP
+        int                     s_depth;
+#endif
 };
 #define CONFIGFS_ROOT           0x0001
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 05373db21a4e..8e48b52205aa 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -78,11 +78,97 @@ static const struct dentry_operations configfs_dentry_ops = {
        .d_delete       = configfs_d_delete,
 };
+#ifdef CONFIG_LOCKDEP
+/*
+ * Helpers to make lockdep happy with our recursive locking of default groups'
+ * inodes (see configfs_attach_group() and configfs_detach_group()).
+ * We put default groups i_mutexes in separate classes according to their depth
+ * from the youngest non-default group ancestor.
+ *
+ * For a non-default group A having default groups A/B, A/C, and A/C/D, default
+ * groups A/B and A/C will have their inode's mutex in class
+ * default_group_class[0], and default group A/C/D will be in
+ * default_group_class[1].
+ *
+ * The lock classes are declared and assigned in inode.c, according to the
+ * s_depth value.
+ * The s_depth value is initialized to -1, adjusted to >= 0 when attaching
+ * default groups, and reset to -1 when all default groups are attached. During
+ * attachment, if configfs_create() sees s_depth > 0, the lock class of the new
+ * inode's mutex is set to default_group_class[s_depth - 1].
+ */
+static void configfs_init_dirent_depth(struct configfs_dirent *sd)
+{
+        sd->s_depth = -1;
+}
+static void configfs_set_dir_dirent_depth(struct configfs_dirent *parent_sd,
+                                          struct configfs_dirent *sd)
+{
+        int parent_depth = parent_sd->s_depth;
+        if (parent_depth >= 0)
+                sd->s_depth = parent_depth + 1;
+}
+static void
+configfs_adjust_dir_dirent_depth_before_populate(struct configfs_dirent *sd)
+{
+        /*
+         * item's i_mutex class is already setup, so s_depth is now only
+         * used to set new sub-directories s_depth, which is always done
+         * with item's i_mutex locked.
+         */
+        /*
+         *  sd->s_depth == -1 iff we are a non default group.
+         *  else (we are a default group) sd->s_depth > 0 (see
+         *  create_dir()).
+         */
+        if (sd->s_depth == -1)
+                /*
+                 * We are a non default group and we are going to create
+                 * default groups.
+                 */
+                sd->s_depth = 0;
+}
+static void
+configfs_adjust_dir_dirent_depth_after_populate(struct configfs_dirent *sd)
+{
+        /* We will not create default groups anymore. */
+        sd->s_depth = -1;
+}
+#else /* CONFIG_LOCKDEP */
+static void configfs_init_dirent_depth(struct configfs_dirent *sd)
+{
+}
+static void configfs_set_dir_dirent_depth(struct configfs_dirent *parent_sd,
+                                          struct configfs_dirent *sd)
+{
+}
+static void
+configfs_adjust_dir_dirent_depth_before_populate(struct configfs_dirent *sd)
+{
+}
+static void
+configfs_adjust_dir_dirent_depth_after_populate(struct configfs_dirent *sd)
+{
+}
+#endif /* CONFIG_LOCKDEP */
 /*
 * Allocates a new configfs_dirent and links it to the parent configfs_dirent
 */
-static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent * parent_sd,
+static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent *parent_sd,
-                                                void * element)
+                                                   void *element, int type)
 {
        struct configfs_dirent * sd;
@@ -94,6 +180,8 @@ static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent * pare
        INIT_LIST_HEAD(&sd->s_links);
        INIT_LIST_HEAD(&sd->s_children);
        sd->s_element = element;
+        sd->s_type = type;
+        configfs_init_dirent_depth(sd);
        spin_lock(&configfs_dirent_lock);
        if (parent_sd->s_type & CONFIGFS_USET_DROPPING) {
                spin_unlock(&configfs_dirent_lock);
@@ -138,12 +226,11 @@ int configfs_make_dirent(struct configfs_dirent * parent_sd,
 {
        struct configfs_dirent * sd;
-        sd = configfs_new_dirent(parent_sd, element);
+        sd = configfs_new_dirent(parent_sd, element, type);
        if (IS_ERR(sd))
                return PTR_ERR(sd);
        sd->s_mode = mode;
-        sd->s_type = type;
        sd->s_dentry = dentry;
        if (dentry) {
                dentry->d_fsdata = configfs_get(sd);
@@ -187,6 +274,7 @@ static int create_dir(struct config_item * k, struct dentry * p,
                error = configfs_make_dirent(p->d_fsdata, d, k, mode,
                                             CONFIGFS_DIR | CONFIGFS_USET_CREATING);
        if (!error) {
+                configfs_set_dir_dirent_depth(p->d_fsdata, d->d_fsdata);
                error = configfs_create(d, mode, init_dir);
                if (!error) {
                        inc_nlink(p->d_inode);
@@ -789,11 +877,13 @@ static int configfs_attach_group(struct config_item *parent_item,
                 * error, as rmdir() would.
                 */
                mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
+                configfs_adjust_dir_dirent_depth_before_populate(sd);
                ret = populate_groups(to_config_group(item));
                if (ret) {
                        configfs_detach_item(item);
                        dentry->d_inode->i_flags |= S_DEAD;
                }
+                configfs_adjust_dir_dirent_depth_after_populate(sd);
                mutex_unlock(&dentry->d_inode->i_mutex);
                if (ret)
                        d_delete(dentry);
@@ -916,11 +1006,11 @@ static int configfs_dump(struct configfs_dirent *sd, int level)
 * Note, btw, that this can be called at *any* time, even when a configfs
 * subsystem isn't registered, or when configfs is loading or unloading.
 * Just like configfs_register_subsystem().  So we take the same
- * precautions.  We pin the filesystem.  We lock each i_mutex _in_order_
+ * precautions.  We pin the filesystem.  We lock configfs_dirent_lock.
- * on our way down the tree.  If we can find the target item in the
+ * If we can find the target item in the
 * configfs tree, it must be part of the subsystem tree as well, so we
- * do not need the subsystem semaphore.  Holding the i_mutex chain locks
+ * do not need the subsystem semaphore.  Holding configfs_dirent_lock helps
- * out mkdir() and rmdir(), who might be racing us.
+ * locking out mkdir() and rmdir(), who might be racing us.
 */
 /*
@@ -933,17 +1023,21 @@ static int configfs_dump(struct configfs_dirent *sd, int level)
 * do that so we can unlock it if we find nothing.
 *
 * Here we do a depth-first search of the dentry hierarchy looking for
- * our object.  We take i_mutex on each step of the way down.  IT IS
+ * our object.
- * ESSENTIAL THAT i_mutex LOCKING IS ORDERED.  If we come back up a branch,
+ * We deliberately ignore items tagged as dropping since they are virtually
- * we'll drop the i_mutex.
+ * dead, as well as items in the middle of attachment since they virtually
+ * do not exist yet. This completes the locking out of racing mkdir() and
+ * rmdir().
+ * Note: subdirectories in the middle of attachment start with s_type =
+ * CONFIGFS_DIR|CONFIGFS_USET_CREATING set by create_dir().  When
+ * CONFIGFS_USET_CREATING is set, we ignore the item.  The actual set of
+ * s_type is in configfs_new_dirent(), which has configfs_dirent_lock.
 *
- * If the target is not found, -ENOENT is bubbled up and we have released
+ * If the target is not found, -ENOENT is bubbled up.
- * all locks.  If the target was found, the locks will be cleared by
- * configfs_depend_rollback().
 *
 * This adds a requirement that all config_items be unique!
 *
- * This is recursive because the locking traversal is tricky.  There isn't
+ * This is recursive.  There isn't
 * much on the stack, though, so folks that need this function - be careful
 * about your stack!  Patches will be accepted to make it iterative.
 */
@@ -955,13 +1049,13 @@ static int configfs_depend_prep(struct dentry *origin,
        BUG_ON(!origin || !sd);
-        /* Lock this guy on the way down */
-        mutex_lock(&sd->s_dentry->d_inode->i_mutex);
        if (sd->s_element == target)  /* Boo-yah */
                goto out;
        list_for_each_entry(child_sd, &sd->s_children, s_sibling) {
-                if (child_sd->s_type & CONFIGFS_DIR) {
+                if ((child_sd->s_type & CONFIGFS_DIR) &&
+                    !(child_sd->s_type & CONFIGFS_USET_DROPPING) &&
+                    !(child_sd->s_type & CONFIGFS_USET_CREATING)) {
                        ret = configfs_depend_prep(child_sd->s_dentry,
                                                   target);
                        if (!ret)
@@ -970,33 +1064,12 @@ static int configfs_depend_prep(struct dentry *origin,
        }
        /* We looped all our children and didn't find target */
-        mutex_unlock(&sd->s_dentry->d_inode->i_mutex);
        ret = -ENOENT;
 out:
        return ret;
 }
-/*
- * This is ONLY called if configfs_depend_prep() did its job.  So we can
- * trust the entire path from item back up to origin.
- *
- * We walk backwards from item, unlocking each i_mutex.  We finish by
- * unlocking origin.
- */
-static void configfs_depend_rollback(struct dentry *origin,
-                                     struct config_item *item)
-{
-        struct dentry *dentry = item->ci_dentry;
-        while (dentry != origin) {
-                mutex_unlock(&dentry->d_inode->i_mutex);
-                dentry = dentry->d_parent;
-        }
-        mutex_unlock(&origin->d_inode->i_mutex);
-}
 int configfs_depend_item(struct configfs_subsystem *subsys,
                         struct config_item *target)
 {
@@ -1037,17 +1110,21 @@ int configfs_depend_item(struct configfs_subsystem *subsys,
        /* Ok, now we can trust subsys/s_item */
-        /* Scan the tree, locking i_mutex recursively, return 0 if found */
+        spin_lock(&configfs_dirent_lock);
+        /* Scan the tree, return 0 if found */
        ret = configfs_depend_prep(subsys_sd->s_dentry, target);
        if (ret)
-                goto out_unlock_fs;
+                goto out_unlock_dirent_lock;
-        /* We hold all i_mutexes from the subsystem down to the target */
+        /*
+         * We are sure that the item is not about to be removed by rmdir(), and
+         * not in the middle of attachment by mkdir().
+         */
        p = target->ci_dentry->d_fsdata;
        p->s_dependent_count += 1;
-        configfs_depend_rollback(subsys_sd->s_dentry, target);
+out_unlock_dirent_lock:
+        spin_unlock(&configfs_dirent_lock);
 out_unlock_fs:
        mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex);
@@ -1072,10 +1149,10 @@ void configfs_undepend_item(struct configfs_subsystem *subsys,
        struct configfs_dirent *sd;
        /*
-         * Since we can trust everything is pinned, we just need i_mutex
+         * Since we can trust everything is pinned, we just need
-         * on the item.
+         * configfs_dirent_lock.
         */
-        mutex_lock(&target->ci_dentry->d_inode->i_mutex);
+        spin_lock(&configfs_dirent_lock);
        sd = target->ci_dentry->d_fsdata;
        BUG_ON(sd->s_dependent_count < 1);
@@ -1086,7 +1163,7 @@ void configfs_undepend_item(struct configfs_subsystem *subsys,
         * After this unlock, we cannot trust the item to stay alive!
         * DO NOT REFERENCE item after this unlock.
         */
-        mutex_unlock(&target->ci_dentry->d_inode->i_mutex);
+        spin_unlock(&configfs_dirent_lock);
 }
 EXPORT_SYMBOL(configfs_undepend_item);
@@ -1286,13 +1363,6 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
        if (sd->s_type & CONFIGFS_USET_DEFAULT)
                return -EPERM;
-        /*
-         * Here's where we check for dependents.  We're protected by
-         * i_mutex.
-         */
-        if (sd->s_dependent_count)
-                return -EBUSY;
        /* Get a working ref until we have the child */
        parent_item = configfs_get_config_item(dentry->d_parent);
        subsys = to_config_group(parent_item)->cg_subsys;
@@ -1316,9 +1386,17 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
                mutex_lock(&configfs_symlink_mutex);
                spin_lock(&configfs_dirent_lock);
-                ret = configfs_detach_prep(dentry, &wait_mutex);
+                /*
-                if (ret)
+                 * Here's where we check for dependents.  We're protected by
-                        configfs_detach_rollback(dentry);
+                 * configfs_dirent_lock.
+                 * If no dependent, atomically tag the item as dropping.
+                 */
+                ret = sd->s_dependent_count ? -EBUSY : 0;
+                if (!ret) {
+                        ret = configfs_detach_prep(dentry, &wait_mutex);
+                        if (ret)
+                                configfs_detach_rollback(dentry);
+                }
                spin_unlock(&configfs_dirent_lock);
                mutex_unlock(&configfs_symlink_mutex);
@@ -1429,7 +1507,7 @@ static int configfs_dir_open(struct inode *inode, struct file *file)
         */
        err = -ENOENT;
        if (configfs_dirent_is_ready(parent_sd)) {
-                file->private_data = configfs_new_dirent(parent_sd, NULL);
+                file->private_data = configfs_new_dirent(parent_sd, NULL, 0);
                if (IS_ERR(file->private_data))
                        err = PTR_ERR(file->private_data);
                else
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 5d349d38e056..4921e7426d95 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -33,10 +33,15 @@
 #include <linux/backing-dev.h>
 #include <linux/capability.h>
 #include <linux/sched.h>
+#include <linux/lockdep.h>
 #include <linux/configfs.h>
 #include "configfs_internal.h"
+#ifdef CONFIG_LOCKDEP
+static struct lock_class_key default_group_class[MAX_LOCK_DEPTH];
+#endif
 extern struct super_block * configfs_sb;
 static const struct address_space_operations configfs_aops = {
@@ -150,6 +155,38 @@ struct inode * configfs_new_inode(mode_t mode, struct configfs_dirent * sd)
        return inode;
 }
+#ifdef CONFIG_LOCKDEP
+static void configfs_set_inode_lock_class(struct configfs_dirent *sd,
+                                          struct inode *inode)
+{
+        int depth = sd->s_depth;
+        if (depth > 0) {
+                if (depth <= ARRAY_SIZE(default_group_class)) {
+                        lockdep_set_class(&inode->i_mutex,
+                                          &default_group_class[depth - 1]);
+                } else {
+                        /*
+                         * In practice the maximum level of locking depth is
+                         * already reached. Just inform about possible reasons.
+                         */
+                        printk(KERN_INFO "configfs: Too many levels of inodes"
+                               " for the locking correctness validator.\n");
+                        printk(KERN_INFO "Spurious warnings may appear.\n");
+                }
+        }
+}
+#else /* CONFIG_LOCKDEP */
+static void configfs_set_inode_lock_class(struct configfs_dirent *sd,
+                                          struct inode *inode)
+{
+}
+#endif /* CONFIG_LOCKDEP */
 int configfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *))
 {
        int error = 0;
@@ -162,6 +199,7 @@ int configfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *
                                        struct inode *p_inode = dentry->d_parent->d_inode;
                                        p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME;
                                }
+                                configfs_set_inode_lock_class(sd, inode);
                                goto Proceed;
                        }
                        else
diff --git a/fs/dlm/dir.c b/fs/dlm/dir.c
index 858fba14aaa6..c4dfa1dcc86f 100644
--- a/fs/dlm/dir.c
+++ b/fs/dlm/dir.c
@@ -49,7 +49,8 @@ static struct dlm_direntry *get_free_de(struct dlm_ls *ls, int len)
        spin_unlock(&ls->ls_recover_list_lock);
        if (!found)
-                de = kzalloc(sizeof(struct dlm_direntry) + len, GFP_KERNEL);
+                de = kzalloc(sizeof(struct dlm_direntry) + len,
+                             ls->ls_allocation);
        return de;
 }
@@ -211,7 +212,7 @@ int dlm_recover_directory(struct dlm_ls *ls)
        dlm_dir_clear(ls);
-        last_name = kmalloc(DLM_RESNAME_MAXLEN, GFP_KERNEL);
+        last_name = kmalloc(DLM_RESNAME_MAXLEN, ls->ls_allocation);
        if (!last_name)
                goto out;
@@ -322,7 +323,7 @@ static int get_entry(struct dlm_ls *ls, int nodeid, char *name,
        if (namelen > DLM_RESNAME_MAXLEN)
                return -EINVAL;
-        de = kzalloc(sizeof(struct dlm_direntry) + namelen, GFP_KERNEL);
+        de = kzalloc(sizeof(struct dlm_direntry) + namelen, ls->ls_allocation);
        if (!de)
                return -ENOMEM;
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index cd8e2df3c295..d489fcc86713 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -384,7 +384,7 @@ static void threads_stop(void)
        dlm_astd_stop();
 }
-static int new_lockspace(char *name, int namelen, void **lockspace,
+static int new_lockspace(const char *name, int namelen, void **lockspace,
                         uint32_t flags, int lvblen)
 {
        struct dlm_ls *ls;
@@ -419,16 +419,14 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
                        break;
                }
                ls->ls_create_count++;
-                module_put(THIS_MODULE);
+                *lockspace = ls;
-                error = 1; /* not an error, return 0 */
+                error = 1;
                break;
        }
        spin_unlock(&lslist_lock);
-        if (error < 0)
-                goto out;
        if (error)
-                goto ret_zero;
+                goto out;
        error = -ENOMEM;
@@ -583,7 +581,6 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
        dlm_create_debug_file(ls);
        log_debug(ls, "join complete");
- ret_zero:
        *lockspace = ls;
        return 0;
@@ -614,7 +611,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
        return error;
 }
-int dlm_new_lockspace(char *name, int namelen, void **lockspace,
+int dlm_new_lockspace(const char *name, int namelen, void **lockspace,
                      uint32_t flags, int lvblen)
 {
        int error = 0;
@@ -628,7 +625,9 @@ int dlm_new_lockspace(char *name, int namelen, void **lockspace,
        error = new_lockspace(name, namelen, lockspace, flags, lvblen);
        if (!error)
                ls_count++;
-        else if (!ls_count)
+        if (error > 0)
+                error = 0;
+        if (!ls_count)
                threads_stop();
 out:
        mutex_unlock(&ls_lock);
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 609108a83267..cdb580a9c7a2 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -309,6 +309,20 @@ static void lowcomms_state_change(struct sock *sk)
                lowcomms_write_space(sk);
 }
+int dlm_lowcomms_connect_node(int nodeid)
+{
+        struct connection *con;
+        if (nodeid == dlm_our_nodeid())
+                return 0;
+        con = nodeid2con(nodeid, GFP_NOFS);
+        if (!con)
+                return -ENOMEM;
+        lowcomms_connect_sock(con);
+        return 0;
+}
 /* Make a socket active */
 static int add_sock(struct socket *sock, struct connection *con)
 {
@@ -486,7 +500,7 @@ static void process_sctp_notification(struct connection *con,
                                return;
                        }
-                        new_con = nodeid2con(nodeid, GFP_KERNEL);
+                        new_con = nodeid2con(nodeid, GFP_NOFS);
                        if (!new_con)
                                return;
@@ -722,7 +736,7 @@ static int tcp_accept_from_sock(struct connection *con)
         *  the same time and the connections cross on the wire.
         *  In this case we store the incoming one in "othercon"
         */
-        newcon = nodeid2con(nodeid, GFP_KERNEL);
+        newcon = nodeid2con(nodeid, GFP_NOFS);
        if (!newcon) {
                result = -ENOMEM;
                goto accept_err;
@@ -732,7 +746,7 @@ static int tcp_accept_from_sock(struct connection *con)
                struct connection *othercon = newcon->othercon;
                if (!othercon) {
-                        othercon = kmem_cache_zalloc(con_cache, GFP_KERNEL);
+                        othercon = kmem_cache_zalloc(con_cache, GFP_NOFS);
                        if (!othercon) {
                                log_print("failed to allocate incoming socket");
                                mutex_unlock(&newcon->sock_mutex);
@@ -1421,7 +1435,7 @@ static int work_start(void)
 static void stop_conn(struct connection *con)
 {
        con->flags |= 0x0F;
-        if (con->sock)
+        if (con->sock && con->sock->sk)
                con->sock->sk->sk_user_data = NULL;
 }
diff --git a/fs/dlm/lowcomms.h b/fs/dlm/lowcomms.h
index a9a9618c0d3f..1311e6426287 100644
--- a/fs/dlm/lowcomms.h
+++ b/fs/dlm/lowcomms.h
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2009 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -19,6 +19,7 @@ void dlm_lowcomms_stop(void);
 int dlm_lowcomms_close(int nodeid);
 void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc);
 void dlm_lowcomms_commit_buffer(void *mh);
+int dlm_lowcomms_connect_node(int nodeid);
 #endif                          /* __LOWCOMMS_DOT_H__ */
diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index 26133f05ae3a..b128775913b2 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -1,7 +1,7 @@
 /******************************************************************************
 *******************************************************************************
 **
-**  Copyright (C) 2005-2008 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2005-2009 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -17,6 +17,7 @@
 #include "recover.h"
 #include "rcom.h"
 #include "config.h"
+#include "lowcomms.h"
 static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new)
 {
@@ -45,9 +46,9 @@ static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new)
 static int dlm_add_member(struct dlm_ls *ls, int nodeid)
 {
        struct dlm_member *memb;
-        int w;
+        int w, error;
-        memb = kzalloc(sizeof(struct dlm_member), GFP_KERNEL);
+        memb = kzalloc(sizeof(struct dlm_member), ls->ls_allocation);
        if (!memb)
                return -ENOMEM;
@@ -57,6 +58,12 @@ static int dlm_add_member(struct dlm_ls *ls, int nodeid)
                return w;
        }
+        error = dlm_lowcomms_connect_node(nodeid);
+        if (error < 0) {
+                kfree(memb);
+                return error;
+        }
        memb->nodeid = nodeid;
        memb->weight = w;
        add_ordered_member(ls, memb);
@@ -136,7 +143,7 @@ static void make_member_array(struct dlm_ls *ls)
        ls->ls_total_weight = total;
-        array = kmalloc(sizeof(int) * total, GFP_KERNEL);
+        array = kmalloc(sizeof(int) * total, ls->ls_allocation);
        if (!array)
                return;
@@ -219,7 +226,7 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
                        continue;
                log_debug(ls, "new nodeid %d is a re-added member", rv->new[i]);
-                memb = kzalloc(sizeof(struct dlm_member), GFP_KERNEL);
+                memb = kzalloc(sizeof(struct dlm_member), ls->ls_allocation);
                if (!memb)
                        return -ENOMEM;
                memb->nodeid = rv->new[i];
@@ -334,7 +341,7 @@ int dlm_ls_start(struct dlm_ls *ls)
        int *ids = NULL, *new = NULL;
        int error, ids_count = 0, new_count = 0;
-        rv = kzalloc(sizeof(struct dlm_recover), GFP_KERNEL);
+        rv = kzalloc(sizeof(struct dlm_recover), ls->ls_allocation);
        if (!rv)
                return -ENOMEM;
diff --git a/fs/dlm/requestqueue.c b/fs/dlm/requestqueue.c
index daa4183fbb84..7a2307c08911 100644
--- a/fs/dlm/requestqueue.c
+++ b/fs/dlm/requestqueue.c
@@ -35,7 +35,7 @@ void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_message *ms)
        struct rq_entry *e;
        int length = ms->m_header.h_length - sizeof(struct dlm_message);
-        e = kmalloc(sizeof(struct rq_entry) + length, GFP_KERNEL);
+        e = kmalloc(sizeof(struct rq_entry) + length, ls->ls_allocation);
        if (!e) {
                log_print("dlm_add_requestqueue: out of memory len %d", length);
                return;
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 2a701d593d35..3f0e1974abdc 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -16,6 +16,7 @@
 #include <linux/anon_inodes.h>
 #include <linux/eventfd.h>
 #include <linux/syscalls.h>
+#include <linux/module.h>
 struct eventfd_ctx {
        wait_queue_head_t wqh;
@@ -56,6 +57,7 @@ int eventfd_signal(struct file *file, int n)
        return n;
 }
+EXPORT_SYMBOL_GPL(eventfd_signal);
 static int eventfd_release(struct inode *inode, struct file *file)
 {
@@ -197,6 +199,7 @@ struct file *eventfd_fget(int fd)
        return file;
 }
+EXPORT_SYMBOL_GPL(eventfd_fget);
 SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
 {
diff --git a/fs/exofs/common.h b/fs/exofs/common.h
index b1512c4bb8c7..24667eedc023 100644
--- a/fs/exofs/common.h
+++ b/fs/exofs/common.h
@@ -175,10 +175,4 @@ int exofs_async_op(struct osd_request *or,
 int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr);
-int osd_req_read_kern(struct osd_request *or,
-        const struct osd_obj_id *obj, u64 offset, void *buff, u64 len);
-int osd_req_write_kern(struct osd_request *or,
-        const struct osd_obj_id *obj, u64 offset, void *buff, u64 len);
 #endif /*ifndef __EXOFS_COM_H__*/
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index ba8d9fab4693..77d0a295eb1c 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -59,10 +59,9 @@ static void _pcol_init(struct page_collect *pcol, unsigned expected_pages,
                struct inode *inode)
 {
        struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
-        struct request_queue *req_q = sbi->s_dev->scsi_device->request_queue;
        pcol->sbi = sbi;
-        pcol->req_q = req_q;
+        pcol->req_q = osd_request_queue(sbi->s_dev);
        pcol->inode = inode;
        pcol->expected_pages = expected_pages;
@@ -266,7 +265,7 @@ static int read_exec(struct page_collect *pcol, bool is_sync)
                goto err;
        }
-        osd_req_read(or, &obj, pcol->bio, i_start);
+        osd_req_read(or, &obj, i_start, pcol->bio, pcol->length);
        if (is_sync) {
                exofs_sync_op(or, pcol->sbi->s_timeout, oi->i_cred);
@@ -522,7 +521,8 @@ static int write_exec(struct page_collect *pcol)
        *pcol_copy = *pcol;
-        osd_req_write(or, &obj, pcol_copy->bio, i_start);
+        pcol_copy->bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */
+        osd_req_write(or, &obj, i_start, pcol_copy->bio, pcol_copy->length);
        ret = exofs_async_op(or, writepages_done, pcol_copy, oi->i_cred);
        if (unlikely(ret)) {
                EXOFS_ERR("write_exec: exofs_async_op() Faild\n");
diff --git a/fs/exofs/osd.c b/fs/exofs/osd.c
index 06ca92672eb5..b3d2ccb87aaa 100644
--- a/fs/exofs/osd.c
+++ b/fs/exofs/osd.c
@@ -125,29 +125,3 @@ int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr)
        return -EIO;
 }
-int osd_req_read_kern(struct osd_request *or,
-        const struct osd_obj_id *obj, u64 offset, void* buff, u64 len)
-{
-        struct request_queue *req_q = or->osd_dev->scsi_device->request_queue;
-        struct bio *bio = bio_map_kern(req_q, buff, len, GFP_KERNEL);
-        if (!bio)
-                return -ENOMEM;
-        osd_req_read(or, obj, bio, offset);
-        return 0;
-}
-int osd_req_write_kern(struct osd_request *or,
-        const struct osd_obj_id *obj, u64 offset, void* buff, u64 len)
-{
-        struct request_queue *req_q = or->osd_dev->scsi_device->request_queue;
-        struct bio *bio = bio_map_kern(req_q, buff, len, GFP_KERNEL);
-        if (!bio)
-                return -ENOMEM;
-        osd_req_write(or, obj, bio, offset);
-        return 0;
-}
diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile
index 72437065f6ad..e95eeb445e58 100644
--- a/fs/fuse/Makefile
+++ b/fs/fuse/Makefile
@@ -3,5 +3,6 @@
 #
 obj-$(CONFIG_FUSE_FS) += fuse.o
+obj-$(CONFIG_CUSE) += cuse.o
 fuse-objs := dev.o dir.o file.o inode.o control.o
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
new file mode 100644
index 000000000000..de792dcf3274
--- /dev/null
+++ b/fs/fuse/cuse.c
@@ -0,0 +1,610 @@
+/*
+ * CUSE: Character device in Userspace
+ *
+ * Copyright (C) 2008-2009  SUSE Linux Products GmbH
+ * Copyright (C) 2008-2009  Tejun Heo <tj@kernel.org>
+ *
+ * This file is released under the GPLv2.
+ *
+ * CUSE enables character devices to be implemented from userland much
+ * like FUSE allows filesystems.  On initialization /dev/cuse is
+ * created.  By opening the file and replying to the CUSE_INIT request
+ * userland CUSE server can create a character device.  After that the
+ * operation is very similar to FUSE.
+ *
+ * A CUSE instance involves the following objects.
+ *
+ * cuse_conn    : contains fuse_conn and serves as bonding structure
+ * channel      : file handle connected to the userland CUSE server
+ * cdev         : the implemented character device
+ * dev          : generic device for cdev
+ *
+ * Note that 'channel' is what 'dev' is in FUSE.  As CUSE deals with
+ * devices, it's called 'channel' to reduce confusion.
+ *
+ * channel determines when the character device dies.  When channel is
+ * closed, everything begins to destruct.  The cuse_conn is taken off
+ * the lookup table preventing further access from cdev, cdev and
+ * generic device are removed and the base reference of cuse_conn is
+ * put.
+ *
+ * On each open, the matching cuse_conn is looked up and if found an
+ * additional reference is taken which is released when the file is
+ * closed.
+ */
+#include <linux/fuse.h>
+#include <linux/cdev.h>
+#include <linux/device.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/kdev_t.h>
+#include <linux/kthread.h>
+#include <linux/list.h>
+#include <linux/magic.h>
+#include <linux/miscdevice.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/stat.h>
+#include "fuse_i.h"
+#define CUSE_CONNTBL_LEN        64
+struct cuse_conn {
+        struct list_head        list;   /* linked on cuse_conntbl */
+        struct fuse_conn        fc;     /* fuse connection */
+        struct cdev             *cdev;  /* associated character device */
+        struct device           *dev;   /* device representing @cdev */
+        /* init parameters, set once during initialization */
+        bool                    unrestricted_ioctl;
+};
+static DEFINE_SPINLOCK(cuse_lock);              /* protects cuse_conntbl */
+static struct list_head cuse_conntbl[CUSE_CONNTBL_LEN];
+static struct class *cuse_class;
+static struct cuse_conn *fc_to_cc(struct fuse_conn *fc)
+{
+        return container_of(fc, struct cuse_conn, fc);
+}
+static struct list_head *cuse_conntbl_head(dev_t devt)
+{
+        return &cuse_conntbl[(MAJOR(devt) + MINOR(devt)) % CUSE_CONNTBL_LEN];
+}
+/**************************************************************************
+ * CUSE frontend operations
+ *
+ * These are file operations for the character device.
+ *
+ * On open, CUSE opens a file from the FUSE mnt and stores it to
+ * private_data of the open file.  All other ops call FUSE ops on the
+ * FUSE file.
+ */
+static ssize_t cuse_read(struct file *file, char __user *buf, size_t count,
+                         loff_t *ppos)
+{
+        loff_t pos = 0;
+        return fuse_direct_io(file, buf, count, &pos, 0);
+}
+static ssize_t cuse_write(struct file *file, const char __user *buf,
+                          size_t count, loff_t *ppos)
+{
+        loff_t pos = 0;
+        /*
+         * No locking or generic_write_checks(), the server is
+         * responsible for locking and sanity checks.
+         */
+        return fuse_direct_io(file, buf, count, &pos, 1);
+}
+static int cuse_open(struct inode *inode, struct file *file)
+{
+        dev_t devt = inode->i_cdev->dev;
+        struct cuse_conn *cc = NULL, *pos;
+        int rc;
+        /* look up and get the connection */
+        spin_lock(&cuse_lock);
+        list_for_each_entry(pos, cuse_conntbl_head(devt), list)
+                if (pos->dev->devt == devt) {
+                        fuse_conn_get(&pos->fc);
+                        cc = pos;
+                        break;
+                }
+        spin_unlock(&cuse_lock);
+        /* dead? */
+        if (!cc)
+                return -ENODEV;
+        /*
+         * Generic permission check is already done against the chrdev
+         * file, proceed to open.
+         */
+        rc = fuse_do_open(&cc->fc, 0, file, 0);
+        if (rc)
+                fuse_conn_put(&cc->fc);
+        return rc;
+}
+static int cuse_release(struct inode *inode, struct file *file)
+{
+        struct fuse_file *ff = file->private_data;
+        struct fuse_conn *fc = ff->fc;
+        fuse_sync_release(ff, file->f_flags);
+        fuse_conn_put(fc);
+        return 0;
+}
+static long cuse_file_ioctl(struct file *file, unsigned int cmd,
+                            unsigned long arg)
+{
+        struct fuse_file *ff = file->private_data;
+        struct cuse_conn *cc = fc_to_cc(ff->fc);
+        unsigned int flags = 0;
+        if (cc->unrestricted_ioctl)
+                flags |= FUSE_IOCTL_UNRESTRICTED;
+        return fuse_do_ioctl(file, cmd, arg, flags);
+}
+static long cuse_file_compat_ioctl(struct file *file, unsigned int cmd,
+                                   unsigned long arg)
+{
+        struct fuse_file *ff = file->private_data;
+        struct cuse_conn *cc = fc_to_cc(ff->fc);
+        unsigned int flags = FUSE_IOCTL_COMPAT;
+        if (cc->unrestricted_ioctl)
+                flags |= FUSE_IOCTL_UNRESTRICTED;
+        return fuse_do_ioctl(file, cmd, arg, flags);
+}
+static const struct file_operations cuse_frontend_fops = {
+        .owner                  = THIS_MODULE,
+        .read                   = cuse_read,
+        .write                  = cuse_write,
+        .open                   = cuse_open,
+        .release                = cuse_release,
+        .unlocked_ioctl         = cuse_file_ioctl,
+        .compat_ioctl           = cuse_file_compat_ioctl,
+        .poll                   = fuse_file_poll,
+};
+/**************************************************************************
+ * CUSE channel initialization and destruction
+ */
+struct cuse_devinfo {
+        const char              *name;
+};
+/**
+ * cuse_parse_one - parse one key=value pair
+ * @pp: i/o parameter for the current position
+ * @end: points to one past the end of the packed string
+ * @keyp: out parameter for key
+ * @valp: out parameter for value
+ *
+ * *@pp points to packed strings - "key0=val0\0key1=val1\0" which ends
+ * at @end - 1.  This function parses one pair and set *@keyp to the
+ * start of the key and *@valp to the start of the value.  Note that
+ * the original string is modified such that the key string is
+ * terminated with '\0'.  *@pp is updated to point to the next string.
+ *
+ * RETURNS:
+ * 1 on successful parse, 0 on EOF, -errno on failure.
+ */
+static int cuse_parse_one(char **pp, char *end, char **keyp, char **valp)
+{
+        char *p = *pp;
+        char *key, *val;
+        while (p < end && *p == '\0')
+                p++;
+        if (p == end)
+                return 0;
+        if (end[-1] != '\0') {
+                printk(KERN_ERR "CUSE: info not properly terminated\n");
+                return -EINVAL;
+        }
+        key = val = p;
+        p += strlen(p);
+        if (valp) {
+                strsep(&val, "=");
+                if (!val)
+                        val = key + strlen(key);
+                key = strstrip(key);
+                val = strstrip(val);
+        } else
+                key = strstrip(key);
+        if (!strlen(key)) {
+                printk(KERN_ERR "CUSE: zero length info key specified\n");
+                return -EINVAL;
+        }
+        *pp = p;
+        *keyp = key;
+        if (valp)
+                *valp = val;
+        return 1;
+}
+/**
+ * cuse_parse_dev_info - parse device info
+ * @p: device info string
+ * @len: length of device info string
+ * @devinfo: out parameter for parsed device info
+ *
+ * Parse @p to extract device info and store it into @devinfo.  String
+ * pointed to by @p is modified by parsing and @devinfo points into
+ * them, so @p shouldn't be freed while @devinfo is in use.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+static int cuse_parse_devinfo(char *p, size_t len, struct cuse_devinfo *devinfo)
+{
+        char *end = p + len;
+        char *key, *val;
+        int rc;
+        while (true) {
+                rc = cuse_parse_one(&p, end, &key, &val);
+                if (rc < 0)
+                        return rc;
+                if (!rc)
+                        break;
+                if (strcmp(key, "DEVNAME") == 0)
+                        devinfo->name = val;
+                else
+                        printk(KERN_WARNING "CUSE: unknown device info \"%s\"\n",
+                               key);
+        }
+        if (!devinfo->name || !strlen(devinfo->name)) {
+                printk(KERN_ERR "CUSE: DEVNAME unspecified\n");
+                return -EINVAL;
+        }
+        return 0;
+}
+static void cuse_gendev_release(struct device *dev)
+{
+        kfree(dev);
+}
+/**
+ * cuse_process_init_reply - finish initializing CUSE channel
+ *
+ * This function creates the character device and sets up all the
+ * required data structures for it.  Please read the comment at the
+ * top of this file for high level overview.
+ */
+static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
+{
+        struct cuse_conn *cc = fc_to_cc(fc);
+        struct cuse_init_out *arg = &req->misc.cuse_init_out;
+        struct page *page = req->pages[0];
+        struct cuse_devinfo devinfo = { };
+        struct device *dev;
+        struct cdev *cdev;
+        dev_t devt;
+        int rc;
+        if (req->out.h.error ||
+            arg->major != FUSE_KERNEL_VERSION || arg->minor < 11) {
+                goto err;
+        }
+        fc->minor = arg->minor;
+        fc->max_read = max_t(unsigned, arg->max_read, 4096);
+        fc->max_write = max_t(unsigned, arg->max_write, 4096);
+        /* parse init reply */
+        cc->unrestricted_ioctl = arg->flags & CUSE_UNRESTRICTED_IOCTL;
+        rc = cuse_parse_devinfo(page_address(page), req->out.args[1].size,
+                                &devinfo);
+        if (rc)
+                goto err;
+        /* determine and reserve devt */
+        devt = MKDEV(arg->dev_major, arg->dev_minor);
+        if (!MAJOR(devt))
+                rc = alloc_chrdev_region(&devt, MINOR(devt), 1, devinfo.name);
+        else
+                rc = register_chrdev_region(devt, 1, devinfo.name);
+        if (rc) {
+                printk(KERN_ERR "CUSE: failed to register chrdev region\n");
+                goto err;
+        }
+        /* devt determined, create device */
+        rc = -ENOMEM;
+        dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+        if (!dev)
+                goto err_region;
+        device_initialize(dev);
+        dev_set_uevent_suppress(dev, 1);
+        dev->class = cuse_class;
+        dev->devt = devt;
+        dev->release = cuse_gendev_release;
+        dev_set_drvdata(dev, cc);
+        dev_set_name(dev, "%s", devinfo.name);
+        rc = device_add(dev);
+        if (rc)
+                goto err_device;
+        /* register cdev */
+        rc = -ENOMEM;
+        cdev = cdev_alloc();
+        if (!cdev)
+                goto err_device;
+        cdev->owner = THIS_MODULE;
+        cdev->ops = &cuse_frontend_fops;
+        rc = cdev_add(cdev, devt, 1);
+        if (rc)
+                goto err_cdev;
+        cc->dev = dev;
+        cc->cdev = cdev;
+        /* make the device available */
+        spin_lock(&cuse_lock);
+        list_add(&cc->list, cuse_conntbl_head(devt));
+        spin_unlock(&cuse_lock);
+        /* announce device availability */
+        dev_set_uevent_suppress(dev, 0);
+        kobject_uevent(&dev->kobj, KOBJ_ADD);
+out:
+        __free_page(page);
+        return;
+err_cdev:
+        cdev_del(cdev);
+err_device:
+        put_device(dev);
+err_region:
+        unregister_chrdev_region(devt, 1);
+err:
+        fc->conn_error = 1;
+        goto out;
+}
+static int cuse_send_init(struct cuse_conn *cc)
+{
+        int rc;
+        struct fuse_req *req;
+        struct page *page;
+        struct fuse_conn *fc = &cc->fc;
+        struct cuse_init_in *arg;
+        BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE);
+        req = fuse_get_req(fc);
+        if (IS_ERR(req)) {
+                rc = PTR_ERR(req);
+                goto err;
+        }
+        rc = -ENOMEM;
+        page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+        if (!page)
+                goto err_put_req;
+        arg = &req->misc.cuse_init_in;
+        arg->major = FUSE_KERNEL_VERSION;
+        arg->minor = FUSE_KERNEL_MINOR_VERSION;
+        arg->flags |= CUSE_UNRESTRICTED_IOCTL;
+        req->in.h.opcode = CUSE_INIT;
+        req->in.numargs = 1;
+        req->in.args[0].size = sizeof(struct cuse_init_in);
+        req->in.args[0].value = arg;
+        req->out.numargs = 2;
+        req->out.args[0].size = sizeof(struct cuse_init_out);
+        req->out.args[0].value = &req->misc.cuse_init_out;
+        req->out.args[1].size = CUSE_INIT_INFO_MAX;
+        req->out.argvar = 1;
+        req->out.argpages = 1;
+        req->pages[0] = page;
+        req->num_pages = 1;
+        req->end = cuse_process_init_reply;
+        fuse_request_send_background(fc, req);
+        return 0;
+err_put_req:
+        fuse_put_request(fc, req);
+err:
+        return rc;
+}
+static void cuse_fc_release(struct fuse_conn *fc)
+{
+        struct cuse_conn *cc = fc_to_cc(fc);
+        kfree(cc);
+}
+/**
+ * cuse_channel_open - open method for /dev/cuse
+ * @inode: inode for /dev/cuse
+ * @file: file struct being opened
+ *
+ * Userland CUSE server can create a CUSE device by opening /dev/cuse
+ * and replying to the initilaization request kernel sends.  This
+ * function is responsible for handling CUSE device initialization.
+ * Because the fd opened by this function is used during
+ * initialization, this function only creates cuse_conn and sends
+ * init.  The rest is delegated to a kthread.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+static int cuse_channel_open(struct inode *inode, struct file *file)
+{
+        struct cuse_conn *cc;
+        int rc;
+        /* set up cuse_conn */
+        cc = kzalloc(sizeof(*cc), GFP_KERNEL);
+        if (!cc)
+                return -ENOMEM;
+        fuse_conn_init(&cc->fc);
+        INIT_LIST_HEAD(&cc->list);
+        cc->fc.release = cuse_fc_release;
+        cc->fc.connected = 1;
+        cc->fc.blocked = 0;
+        rc = cuse_send_init(cc);
+        if (rc) {
+                fuse_conn_put(&cc->fc);
+                return rc;
+        }
+        file->private_data = &cc->fc;   /* channel owns base reference to cc */
+        return 0;
+}
+/**
+ * cuse_channel_release - release method for /dev/cuse
+ * @inode: inode for /dev/cuse
+ * @file: file struct being closed
+ *
+ * Disconnect the channel, deregister CUSE device and initiate
+ * destruction by putting the default reference.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+static int cuse_channel_release(struct inode *inode, struct file *file)
+{
+        struct cuse_conn *cc = fc_to_cc(file->private_data);
+        int rc;
+        /* remove from the conntbl, no more access from this point on */
+        spin_lock(&cuse_lock);
+        list_del_init(&cc->list);
+        spin_unlock(&cuse_lock);
+        /* remove device */
+        if (cc->dev)
+                device_unregister(cc->dev);
+        if (cc->cdev) {
+                unregister_chrdev_region(cc->cdev->dev, 1);
+                cdev_del(cc->cdev);
+        }
+        /* kill connection and shutdown channel */
+        fuse_conn_kill(&cc->fc);
+        rc = fuse_dev_release(inode, file);     /* puts the base reference */
+        return rc;
+}
+static struct file_operations cuse_channel_fops; /* initialized during init */
+/**************************************************************************
+ * Misc stuff and module initializatiion
+ *
+ * CUSE exports the same set of attributes to sysfs as fusectl.
+ */
+static ssize_t cuse_class_waiting_show(struct device *dev,
+                                       struct device_attribute *attr, char *buf)
+{
+        struct cuse_conn *cc = dev_get_drvdata(dev);
+        return sprintf(buf, "%d\n", atomic_read(&cc->fc.num_waiting));
+}
+static ssize_t cuse_class_abort_store(struct device *dev,
+                                      struct device_attribute *attr,
+                                      const char *buf, size_t count)
+{
+        struct cuse_conn *cc = dev_get_drvdata(dev);
+        fuse_abort_conn(&cc->fc);
+        return count;
+}
+static struct device_attribute cuse_class_dev_attrs[] = {
+        __ATTR(waiting, S_IFREG | 0400, cuse_class_waiting_show, NULL),
+        __ATTR(abort, S_IFREG | 0200, NULL, cuse_class_abort_store),
+        { }
+};
+static struct miscdevice cuse_miscdev = {
+        .minor          = MISC_DYNAMIC_MINOR,
+        .name           = "cuse",
+        .fops           = &cuse_channel_fops,
+};
+static int __init cuse_init(void)
+{
+        int i, rc;
+        /* init conntbl */
+        for (i = 0; i < CUSE_CONNTBL_LEN; i++)
+                INIT_LIST_HEAD(&cuse_conntbl[i]);
+        /* inherit and extend fuse_dev_operations */
+        cuse_channel_fops               = fuse_dev_operations;
+        cuse_channel_fops.owner         = THIS_MODULE;
+        cuse_channel_fops.open          = cuse_channel_open;
+        cuse_channel_fops.release       = cuse_channel_release;
+        cuse_class = class_create(THIS_MODULE, "cuse");
+        if (IS_ERR(cuse_class))
+                return PTR_ERR(cuse_class);
+        cuse_class->dev_attrs = cuse_class_dev_attrs;
+        rc = misc_register(&cuse_miscdev);
+        if (rc) {
+                class_destroy(cuse_class);
+                return rc;
+        }
+        return 0;
+}
+static void __exit cuse_exit(void)
+{
+        misc_deregister(&cuse_miscdev);
+        class_destroy(cuse_class);
+}
+module_init(cuse_init);
+module_exit(cuse_exit);
+MODULE_AUTHOR("Tejun Heo <tj@kernel.org>");
+MODULE_DESCRIPTION("Character device in Userspace");
+MODULE_LICENSE("GPL");
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index ba76b68c52ff..8fed2ed12f38 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -46,6 +46,7 @@ struct fuse_req *fuse_request_alloc(void)
                fuse_request_init(req);
        return req;
 }
+EXPORT_SYMBOL_GPL(fuse_request_alloc);
 struct fuse_req *fuse_request_alloc_nofs(void)
 {
@@ -124,6 +125,7 @@ struct fuse_req *fuse_get_req(struct fuse_conn *fc)
        atomic_dec(&fc->num_waiting);
        return ERR_PTR(err);
 }
+EXPORT_SYMBOL_GPL(fuse_get_req);
 /*
 * Return request in fuse_file->reserved_req.  However that may
@@ -208,6 +210,7 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
                        fuse_request_free(req);
        }
 }
+EXPORT_SYMBOL_GPL(fuse_put_request);
 static unsigned len_args(unsigned numargs, struct fuse_arg *args)
 {
@@ -282,7 +285,7 @@ __releases(&fc->lock)
                        wake_up_all(&fc->blocked_waitq);
                }
                if (fc->num_background == FUSE_CONGESTION_THRESHOLD &&
-                    fc->connected) {
+                    fc->connected && fc->bdi_initialized) {
                        clear_bdi_congested(&fc->bdi, READ);
                        clear_bdi_congested(&fc->bdi, WRITE);
                }
@@ -400,6 +403,7 @@ void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
        }
        spin_unlock(&fc->lock);
 }
+EXPORT_SYMBOL_GPL(fuse_request_send);
 static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
                                            struct fuse_req *req)
@@ -408,7 +412,8 @@ static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
        fc->num_background++;
        if (fc->num_background == FUSE_MAX_BACKGROUND)
                fc->blocked = 1;
-        if (fc->num_background == FUSE_CONGESTION_THRESHOLD) {
+        if (fc->num_background == FUSE_CONGESTION_THRESHOLD &&
+            fc->bdi_initialized) {
                set_bdi_congested(&fc->bdi, READ);
                set_bdi_congested(&fc->bdi, WRITE);
        }
@@ -439,6 +444,7 @@ void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
        req->isreply = 1;
        fuse_request_send_nowait(fc, req);
 }
+EXPORT_SYMBOL_GPL(fuse_request_send_background);
 /*
 * Called under fc->lock
@@ -1105,8 +1111,9 @@ void fuse_abort_conn(struct fuse_conn *fc)
        }
        spin_unlock(&fc->lock);
 }
+EXPORT_SYMBOL_GPL(fuse_abort_conn);
-static int fuse_dev_release(struct inode *inode, struct file *file)
+int fuse_dev_release(struct inode *inode, struct file *file)
 {
        struct fuse_conn *fc = fuse_get_conn(file);
        if (fc) {
@@ -1120,6 +1127,7 @@ static int fuse_dev_release(struct inode *inode, struct file *file)
        return 0;
 }
+EXPORT_SYMBOL_GPL(fuse_dev_release);
 static int fuse_dev_fasync(int fd, struct file *file, int on)
 {
@@ -1142,6 +1150,7 @@ const struct file_operations fuse_dev_operations = {
        .release        = fuse_dev_release,
        .fasync         = fuse_dev_fasync,
 };
+EXPORT_SYMBOL_GPL(fuse_dev_operations);
 static struct miscdevice fuse_miscdevice = {
        .minor = FUSE_MINOR,
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 8b8eebc5614b..b3089a083d30 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -362,19 +362,6 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
 }
 /*
- * Synchronous release for the case when something goes wrong in CREATE_OPEN
- */
-static void fuse_sync_release(struct fuse_conn *fc, struct fuse_file *ff,
-                              u64 nodeid, int flags)
-{
-        fuse_release_fill(ff, nodeid, flags, FUSE_RELEASE);
-        ff->reserved_req->force = 1;
-        fuse_request_send(fc, ff->reserved_req);
-        fuse_put_request(fc, ff->reserved_req);
-        kfree(ff);
-}
-/*
 * Atomic create+open operation
 *
 * If the filesystem doesn't support this, then fall back to separate
@@ -445,12 +432,14 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
                goto out_free_ff;
        fuse_put_request(fc, req);
+        ff->fh = outopen.fh;
+        ff->nodeid = outentry.nodeid;
+        ff->open_flags = outopen.open_flags;
        inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
                          &outentry.attr, entry_attr_timeout(&outentry), 0);
        if (!inode) {
                flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
-                ff->fh = outopen.fh;
+                fuse_sync_release(ff, flags);
-                fuse_sync_release(fc, ff, outentry.nodeid, flags);
                fuse_send_forget(fc, forget_req, outentry.nodeid, 1);
                return -ENOMEM;
        }
@@ -460,11 +449,11 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
        fuse_invalidate_attr(dir);
        file = lookup_instantiate_filp(nd, entry, generic_file_open);
        if (IS_ERR(file)) {
-                ff->fh = outopen.fh;
+                fuse_sync_release(ff, flags);
-                fuse_sync_release(fc, ff, outentry.nodeid, flags);
                return PTR_ERR(file);
        }
-        fuse_finish_open(inode, file, ff, &outopen);
+        file->private_data = fuse_file_get(ff);
+        fuse_finish_open(inode, file);
        return 0;
 out_free_ff:
@@ -1035,7 +1024,7 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
        req->out.argpages = 1;
        req->num_pages = 1;
        req->pages[0] = page;
-        fuse_read_fill(req, file, inode, file->f_pos, PAGE_SIZE, FUSE_READDIR);
+        fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, FUSE_READDIR);
        fuse_request_send(fc, req);
        nbytes = req->out.args[0].size;
        err = req->out.h.error;
@@ -1101,12 +1090,14 @@ static void fuse_put_link(struct dentry *dentry, struct nameidata *nd, void *c)
 static int fuse_dir_open(struct inode *inode, struct file *file)
 {
-        return fuse_open_common(inode, file, 1);
+        return fuse_open_common(inode, file, true);
 }
 static int fuse_dir_release(struct inode *inode, struct file *file)
 {
-        return fuse_release_common(inode, file, 1);
+        fuse_release_common(file, FUSE_RELEASEDIR);
+        return 0;
 }
 static int fuse_dir_fsync(struct file *file, struct dentry *de, int datasync)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 06f30e965676..fce6ce694fde 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -12,13 +12,13 @@
 #include <linux/slab.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/module.h>
 static const struct file_operations fuse_direct_io_file_operations;
-static int fuse_send_open(struct inode *inode, struct file *file, int isdir,
+static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
-                          struct fuse_open_out *outargp)
+                          int opcode, struct fuse_open_out *outargp)
 {
-        struct fuse_conn *fc = get_fuse_conn(inode);
        struct fuse_open_in inarg;
        struct fuse_req *req;
        int err;
@@ -31,8 +31,8 @@ static int fuse_send_open(struct inode *inode, struct file *file, int isdir,
        inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY);
        if (!fc->atomic_o_trunc)
                inarg.flags &= ~O_TRUNC;
-        req->in.h.opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
+        req->in.h.opcode = opcode;
-        req->in.h.nodeid = get_node_id(inode);
+        req->in.h.nodeid = nodeid;
        req->in.numargs = 1;
        req->in.args[0].size = sizeof(inarg);
        req->in.args[0].value = &inarg;
@@ -49,22 +49,27 @@ static int fuse_send_open(struct inode *inode, struct file *file, int isdir,
 struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
 {
        struct fuse_file *ff;
        ff = kmalloc(sizeof(struct fuse_file), GFP_KERNEL);
-        if (ff) {
+        if (unlikely(!ff))
-                ff->reserved_req = fuse_request_alloc();
+                return NULL;
-                if (!ff->reserved_req) {
-                        kfree(ff);
+        ff->fc = fc;
-                        return NULL;
+        ff->reserved_req = fuse_request_alloc();
-                } else {
+        if (unlikely(!ff->reserved_req)) {
-                        INIT_LIST_HEAD(&ff->write_entry);
+                kfree(ff);
-                        atomic_set(&ff->count, 0);
+                return NULL;
-                        spin_lock(&fc->lock);
-                        ff->kh = ++fc->khctr;
-                        spin_unlock(&fc->lock);
-                }
-                RB_CLEAR_NODE(&ff->polled_node);
-                init_waitqueue_head(&ff->poll_wait);
        }
+        INIT_LIST_HEAD(&ff->write_entry);
+        atomic_set(&ff->count, 0);
+        RB_CLEAR_NODE(&ff->polled_node);
+        init_waitqueue_head(&ff->poll_wait);
+        spin_lock(&fc->lock);
+        ff->kh = ++fc->khctr;
+        spin_unlock(&fc->lock);
        return ff;
 }
@@ -74,7 +79,7 @@ void fuse_file_free(struct fuse_file *ff)
        kfree(ff);
 }
-static struct fuse_file *fuse_file_get(struct fuse_file *ff)
+struct fuse_file *fuse_file_get(struct fuse_file *ff)
 {
        atomic_inc(&ff->count);
        return ff;
@@ -82,40 +87,65 @@ static struct fuse_file *fuse_file_get(struct fuse_file *ff)
 static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
 {
-        dput(req->misc.release.dentry);
+        path_put(&req->misc.release.path);
-        mntput(req->misc.release.vfsmount);
 }
 static void fuse_file_put(struct fuse_file *ff)
 {
        if (atomic_dec_and_test(&ff->count)) {
                struct fuse_req *req = ff->reserved_req;
-                struct inode *inode = req->misc.release.dentry->d_inode;
-                struct fuse_conn *fc = get_fuse_conn(inode);
                req->end = fuse_release_end;
-                fuse_request_send_background(fc, req);
+                fuse_request_send_background(ff->fc, req);
                kfree(ff);
        }
 }
-void fuse_finish_open(struct inode *inode, struct file *file,
+int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
-                      struct fuse_file *ff, struct fuse_open_out *outarg)
+                 bool isdir)
 {
-        if (outarg->open_flags & FOPEN_DIRECT_IO)
+        struct fuse_open_out outarg;
+        struct fuse_file *ff;
+        int err;
+        int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
+        ff = fuse_file_alloc(fc);
+        if (!ff)
+                return -ENOMEM;
+        err = fuse_send_open(fc, nodeid, file, opcode, &outarg);
+        if (err) {
+                fuse_file_free(ff);
+                return err;
+        }
+        if (isdir)
+                outarg.open_flags &= ~FOPEN_DIRECT_IO;
+        ff->fh = outarg.fh;
+        ff->nodeid = nodeid;
+        ff->open_flags = outarg.open_flags;
+        file->private_data = fuse_file_get(ff);
+        return 0;
+}
+EXPORT_SYMBOL_GPL(fuse_do_open);
+void fuse_finish_open(struct inode *inode, struct file *file)
+{
+        struct fuse_file *ff = file->private_data;
+        if (ff->open_flags & FOPEN_DIRECT_IO)
                file->f_op = &fuse_direct_io_file_operations;
-        if (!(outarg->open_flags & FOPEN_KEEP_CACHE))
+        if (!(ff->open_flags & FOPEN_KEEP_CACHE))
                invalidate_inode_pages2(inode->i_mapping);
-        if (outarg->open_flags & FOPEN_NONSEEKABLE)
+        if (ff->open_flags & FOPEN_NONSEEKABLE)
                nonseekable_open(inode, file);
-        ff->fh = outarg->fh;
-        file->private_data = fuse_file_get(ff);
 }
-int fuse_open_common(struct inode *inode, struct file *file, int isdir)
+int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
 {
        struct fuse_conn *fc = get_fuse_conn(inode);
-        struct fuse_open_out outarg;
-        struct fuse_file *ff;
        int err;
        /* VFS checks this, but only _after_ ->open() */
@@ -126,78 +156,85 @@ int fuse_open_common(struct inode *inode, struct file *file, int isdir)
        if (err)
                return err;
-        ff = fuse_file_alloc(fc);
+        err = fuse_do_open(fc, get_node_id(inode), file, isdir);
-        if (!ff)
-                return -ENOMEM;
-        err = fuse_send_open(inode, file, isdir, &outarg);
        if (err)
-                fuse_file_free(ff);
+                return err;
-        else {
-                if (isdir)
-                        outarg.open_flags &= ~FOPEN_DIRECT_IO;
-                fuse_finish_open(inode, file, ff, &outarg);
-        }
-        return err;
+        fuse_finish_open(inode, file);
+        return 0;
 }
-void fuse_release_fill(struct fuse_file *ff, u64 nodeid, int flags, int opcode)
+static void fuse_prepare_release(struct fuse_file *ff, int flags, int opcode)
 {
+        struct fuse_conn *fc = ff->fc;
        struct fuse_req *req = ff->reserved_req;
        struct fuse_release_in *inarg = &req->misc.release.in;
+        spin_lock(&fc->lock);
+        list_del(&ff->write_entry);
+        if (!RB_EMPTY_NODE(&ff->polled_node))
+                rb_erase(&ff->polled_node, &fc->polled_files);
+        spin_unlock(&fc->lock);
+        wake_up_interruptible_sync(&ff->poll_wait);
        inarg->fh = ff->fh;
        inarg->flags = flags;
        req->in.h.opcode = opcode;
-        req->in.h.nodeid = nodeid;
+        req->in.h.nodeid = ff->nodeid;
        req->in.numargs = 1;
        req->in.args[0].size = sizeof(struct fuse_release_in);
        req->in.args[0].value = inarg;
 }
-int fuse_release_common(struct inode *inode, struct file *file, int isdir)
+void fuse_release_common(struct file *file, int opcode)
 {
-        struct fuse_file *ff = file->private_data;
+        struct fuse_file *ff;
-        if (ff) {
+        struct fuse_req *req;
-                struct fuse_conn *fc = get_fuse_conn(inode);
-                struct fuse_req *req = ff->reserved_req;
-                fuse_release_fill(ff, get_node_id(inode), file->f_flags,
-                                  isdir ? FUSE_RELEASEDIR : FUSE_RELEASE);
-                /* Hold vfsmount and dentry until release is finished */
+        ff = file->private_data;
-                req->misc.release.vfsmount = mntget(file->f_path.mnt);
+        if (unlikely(!ff))
-                req->misc.release.dentry = dget(file->f_path.dentry);
+                return;
-                spin_lock(&fc->lock);
+        req = ff->reserved_req;
-                list_del(&ff->write_entry);
+        fuse_prepare_release(ff, file->f_flags, opcode);
-                if (!RB_EMPTY_NODE(&ff->polled_node))
-                        rb_erase(&ff->polled_node, &fc->polled_files);
-                spin_unlock(&fc->lock);
-                wake_up_interruptible_sync(&ff->poll_wait);
+        /* Hold vfsmount and dentry until release is finished */
-                /*
+        path_get(&file->f_path);
-                 * Normally this will send the RELEASE request,
+        req->misc.release.path = file->f_path;
-                 * however if some asynchronous READ or WRITE requests
-                 * are outstanding, the sending will be delayed
-                 */
-                fuse_file_put(ff);
-        }
-        /* Return value is ignored by VFS */
+        /*
-        return 0;
+         * Normally this will send the RELEASE request, however if
+         * some asynchronous READ or WRITE requests are outstanding,
+         * the sending will be delayed.
+         */
+        fuse_file_put(ff);
 }
 static int fuse_open(struct inode *inode, struct file *file)
 {
-        return fuse_open_common(inode, file, 0);
+        return fuse_open_common(inode, file, false);
 }
 static int fuse_release(struct inode *inode, struct file *file)
 {
-        return fuse_release_common(inode, file, 0);
+        fuse_release_common(file, FUSE_RELEASE);
+        /* return value is ignored by VFS */
+        return 0;
+}
+void fuse_sync_release(struct fuse_file *ff, int flags)
+{
+        WARN_ON(atomic_read(&ff->count) > 1);
+        fuse_prepare_release(ff, flags, FUSE_RELEASE);
+        ff->reserved_req->force = 1;
+        fuse_request_send(ff->fc, ff->reserved_req);
+        fuse_put_request(ff->fc, ff->reserved_req);
+        kfree(ff);
 }
+EXPORT_SYMBOL_GPL(fuse_sync_release);
 /*
 * Scramble the ID space with XTEA, so that the value of the files_struct
@@ -371,8 +408,8 @@ static int fuse_fsync(struct file *file, struct dentry *de, int datasync)
        return fuse_fsync_common(file, de, datasync, 0);
 }
-void fuse_read_fill(struct fuse_req *req, struct file *file,
+void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos,
-                    struct inode *inode, loff_t pos, size_t count, int opcode)
+                    size_t count, int opcode)
 {
        struct fuse_read_in *inarg = &req->misc.read.in;
        struct fuse_file *ff = file->private_data;
@@ -382,7 +419,7 @@ void fuse_read_fill(struct fuse_req *req, struct file *file,
        inarg->size = count;
        inarg->flags = file->f_flags;
        req->in.h.opcode = opcode;
-        req->in.h.nodeid = get_node_id(inode);
+        req->in.h.nodeid = ff->nodeid;
        req->in.numargs = 1;
        req->in.args[0].size = sizeof(struct fuse_read_in);
        req->in.args[0].value = inarg;
@@ -392,12 +429,12 @@ void fuse_read_fill(struct fuse_req *req, struct file *file,
 }
 static size_t fuse_send_read(struct fuse_req *req, struct file *file,
-                             struct inode *inode, loff_t pos, size_t count,
+                             loff_t pos, size_t count, fl_owner_t owner)
-                             fl_owner_t owner)
 {
-        struct fuse_conn *fc = get_fuse_conn(inode);
+        struct fuse_file *ff = file->private_data;
+        struct fuse_conn *fc = ff->fc;
-        fuse_read_fill(req, file, inode, pos, count, FUSE_READ);
+        fuse_read_fill(req, file, pos, count, FUSE_READ);
        if (owner != NULL) {
                struct fuse_read_in *inarg = &req->misc.read.in;
@@ -455,7 +492,7 @@ static int fuse_readpage(struct file *file, struct page *page)
        req->out.argpages = 1;
        req->num_pages = 1;
        req->pages[0] = page;
-        num_read = fuse_send_read(req, file, inode, pos, count, NULL);
+        num_read = fuse_send_read(req, file, pos, count, NULL);
        err = req->out.h.error;
        fuse_put_request(fc, req);
@@ -504,19 +541,18 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
                fuse_file_put(req->ff);
 }
-static void fuse_send_readpages(struct fuse_req *req, struct file *file,
+static void fuse_send_readpages(struct fuse_req *req, struct file *file)
-                                struct inode *inode)
 {
-        struct fuse_conn *fc = get_fuse_conn(inode);
+        struct fuse_file *ff = file->private_data;
+        struct fuse_conn *fc = ff->fc;
        loff_t pos = page_offset(req->pages[0]);
        size_t count = req->num_pages << PAGE_CACHE_SHIFT;
        req->out.argpages = 1;
        req->out.page_zeroing = 1;
-        fuse_read_fill(req, file, inode, pos, count, FUSE_READ);
+        fuse_read_fill(req, file, pos, count, FUSE_READ);
        req->misc.read.attr_ver = fuse_get_attr_version(fc);
        if (fc->async_read) {
-                struct fuse_file *ff = file->private_data;
                req->ff = fuse_file_get(ff);
                req->end = fuse_readpages_end;
                fuse_request_send_background(fc, req);
@@ -546,7 +582,7 @@ static int fuse_readpages_fill(void *_data, struct page *page)
            (req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
             (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read ||
             req->pages[req->num_pages - 1]->index + 1 != page->index)) {
-                fuse_send_readpages(req, data->file, inode);
+                fuse_send_readpages(req, data->file);
                data->req = req = fuse_get_req(fc);
                if (IS_ERR(req)) {
                        unlock_page(page);
@@ -580,7 +616,7 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
        err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
        if (!err) {
                if (data.req->num_pages)
-                        fuse_send_readpages(data.req, file, inode);
+                        fuse_send_readpages(data.req, file);
                else
                        fuse_put_request(fc, data.req);
        }
@@ -607,24 +643,19 @@ static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
        return generic_file_aio_read(iocb, iov, nr_segs, pos);
 }
-static void fuse_write_fill(struct fuse_req *req, struct file *file,
+static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff,
-                            struct fuse_file *ff, struct inode *inode,
+                            loff_t pos, size_t count)
-                            loff_t pos, size_t count, int writepage)
 {
-        struct fuse_conn *fc = get_fuse_conn(inode);
        struct fuse_write_in *inarg = &req->misc.write.in;
        struct fuse_write_out *outarg = &req->misc.write.out;
-        memset(inarg, 0, sizeof(struct fuse_write_in));
        inarg->fh = ff->fh;
        inarg->offset = pos;
        inarg->size = count;
-        inarg->write_flags = writepage ? FUSE_WRITE_CACHE : 0;
-        inarg->flags = file ? file->f_flags : 0;
        req->in.h.opcode = FUSE_WRITE;
-        req->in.h.nodeid = get_node_id(inode);
+        req->in.h.nodeid = ff->nodeid;
        req->in.numargs = 2;
-        if (fc->minor < 9)
+        if (ff->fc->minor < 9)
                req->in.args[0].size = FUSE_COMPAT_WRITE_IN_SIZE;
        else
                req->in.args[0].size = sizeof(struct fuse_write_in);
@@ -636,13 +667,15 @@ static void fuse_write_fill(struct fuse_req *req, struct file *file,
 }
 static size_t fuse_send_write(struct fuse_req *req, struct file *file,
-                              struct inode *inode, loff_t pos, size_t count,
+                              loff_t pos, size_t count, fl_owner_t owner)
-                              fl_owner_t owner)
 {
-        struct fuse_conn *fc = get_fuse_conn(inode);
+        struct fuse_file *ff = file->private_data;
-        fuse_write_fill(req, file, file->private_data, inode, pos, count, 0);
+        struct fuse_conn *fc = ff->fc;
+        struct fuse_write_in *inarg = &req->misc.write.in;
+        fuse_write_fill(req, ff, pos, count);
+        inarg->flags = file->f_flags;
        if (owner != NULL) {
-                struct fuse_write_in *inarg = &req->misc.write.in;
                inarg->write_flags |= FUSE_WRITE_LOCKOWNER;
                inarg->lock_owner = fuse_lock_owner_id(fc, owner);
        }
@@ -700,7 +733,7 @@ static int fuse_buffered_write(struct file *file, struct inode *inode,
        req->num_pages = 1;
        req->pages[0] = page;
        req->page_offset = offset;
-        nres = fuse_send_write(req, file, inode, pos, count, NULL);
+        nres = fuse_send_write(req, file, pos, count, NULL);
        err = req->out.h.error;
        fuse_put_request(fc, req);
        if (!err && !nres)
@@ -741,7 +774,7 @@ static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file,
        for (i = 0; i < req->num_pages; i++)
                fuse_wait_on_page_writeback(inode, req->pages[i]->index);
-        res = fuse_send_write(req, file, inode, pos, count, NULL);
+        res = fuse_send_write(req, file, pos, count, NULL);
        offset = req->page_offset;
        count = res;
@@ -979,25 +1012,23 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf,
        return 0;
 }
-static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
+ssize_t fuse_direct_io(struct file *file, const char __user *buf,
-                              size_t count, loff_t *ppos, int write)
+                       size_t count, loff_t *ppos, int write)
 {
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct fuse_file *ff = file->private_data;
-        struct fuse_conn *fc = get_fuse_conn(inode);
+        struct fuse_conn *fc = ff->fc;
        size_t nmax = write ? fc->max_write : fc->max_read;
        loff_t pos = *ppos;
        ssize_t res = 0;
        struct fuse_req *req;
-        if (is_bad_inode(inode))
-                return -EIO;
        req = fuse_get_req(fc);
        if (IS_ERR(req))
                return PTR_ERR(req);
        while (count) {
                size_t nres;
+                fl_owner_t owner = current->files;
                size_t nbytes = min(count, nmax);
                int err = fuse_get_user_pages(req, buf, &nbytes, write);
                if (err) {
@@ -1006,11 +1037,10 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
                }
                if (write)
-                        nres = fuse_send_write(req, file, inode, pos, nbytes,
+                        nres = fuse_send_write(req, file, pos, nbytes, owner);
-                                               current->files);
                else
-                        nres = fuse_send_read(req, file, inode, pos, nbytes,
+                        nres = fuse_send_read(req, file, pos, nbytes, owner);
-                                              current->files);
                fuse_release_user_pages(req, !write);
                if (req->out.h.error) {
                        if (!res)
@@ -1034,20 +1064,27 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
                }
        }
        fuse_put_request(fc, req);
-        if (res > 0) {
+        if (res > 0)
-                if (write)
-                        fuse_write_update_size(inode, pos);
                *ppos = pos;
-        }
-        fuse_invalidate_attr(inode);
        return res;
 }
+EXPORT_SYMBOL_GPL(fuse_direct_io);
 static ssize_t fuse_direct_read(struct file *file, char __user *buf,
                                     size_t count, loff_t *ppos)
 {
-        return fuse_direct_io(file, buf, count, ppos, 0);
+        ssize_t res;
+        struct inode *inode = file->f_path.dentry->d_inode;
+        if (is_bad_inode(inode))
+                return -EIO;
+        res = fuse_direct_io(file, buf, count, ppos, 0);
+        fuse_invalidate_attr(inode);
+        return res;
 }
 static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
@@ -1055,12 +1092,22 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
 {
        struct inode *inode = file->f_path.dentry->d_inode;
        ssize_t res;
+        if (is_bad_inode(inode))
+                return -EIO;
        /* Don't allow parallel writes to the same file */
        mutex_lock(&inode->i_mutex);
        res = generic_write_checks(file, ppos, &count, 0);
-        if (!res)
+        if (!res) {
                res = fuse_direct_io(file, buf, count, ppos, 1);
+                if (res > 0)
+                        fuse_write_update_size(inode, *ppos);
+        }
        mutex_unlock(&inode->i_mutex);
+        fuse_invalidate_attr(inode);
        return res;
 }
@@ -1177,9 +1224,10 @@ static int fuse_writepage_locked(struct page *page)
        req->ff = fuse_file_get(ff);
        spin_unlock(&fc->lock);
-        fuse_write_fill(req, NULL, ff, inode, page_offset(page), 0, 1);
+        fuse_write_fill(req, ff, page_offset(page), 0);
        copy_highpage(tmp_page, page);
+        req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
        req->in.argpages = 1;
        req->num_pages = 1;
        req->pages[0] = tmp_page;
@@ -1603,12 +1651,11 @@ static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov,
 * limits ioctl data transfers to well-formed ioctls and is the forced
 * behavior for all FUSE servers.
 */
-static long fuse_file_do_ioctl(struct file *file, unsigned int cmd,
+long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
-                               unsigned long arg, unsigned int flags)
+                   unsigned int flags)
 {
-        struct inode *inode = file->f_dentry->d_inode;
        struct fuse_file *ff = file->private_data;
-        struct fuse_conn *fc = get_fuse_conn(inode);
+        struct fuse_conn *fc = ff->fc;
        struct fuse_ioctl_in inarg = {
                .fh = ff->fh,
                .cmd = cmd,
@@ -1627,13 +1674,6 @@ static long fuse_file_do_ioctl(struct file *file, unsigned int cmd,
        /* assume all the iovs returned by client always fits in a page */
        BUILD_BUG_ON(sizeof(struct iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE);
-        if (!fuse_allow_task(fc, current))
-                return -EACCES;
-        err = -EIO;
-        if (is_bad_inode(inode))
-                goto out;
        err = -ENOMEM;
        pages = kzalloc(sizeof(pages[0]) * FUSE_MAX_PAGES_PER_REQ, GFP_KERNEL);
        iov_page = alloc_page(GFP_KERNEL);
@@ -1694,7 +1734,7 @@ static long fuse_file_do_ioctl(struct file *file, unsigned int cmd,
        /* okay, let's send it to the client */
        req->in.h.opcode = FUSE_IOCTL;
-        req->in.h.nodeid = get_node_id(inode);
+        req->in.h.nodeid = ff->nodeid;
        req->in.numargs = 1;
        req->in.args[0].size = sizeof(inarg);
        req->in.args[0].value = &inarg;
@@ -1777,17 +1817,33 @@ static long fuse_file_do_ioctl(struct file *file, unsigned int cmd,
        return err ? err : outarg.result;
 }
+EXPORT_SYMBOL_GPL(fuse_do_ioctl);
+static long fuse_file_ioctl_common(struct file *file, unsigned int cmd,
+                                   unsigned long arg, unsigned int flags)
+{
+        struct inode *inode = file->f_dentry->d_inode;
+        struct fuse_conn *fc = get_fuse_conn(inode);
+        if (!fuse_allow_task(fc, current))
+                return -EACCES;
+        if (is_bad_inode(inode))
+                return -EIO;
+        return fuse_do_ioctl(file, cmd, arg, flags);
+}
 static long fuse_file_ioctl(struct file *file, unsigned int cmd,
                            unsigned long arg)
 {
-        return fuse_file_do_ioctl(file, cmd, arg, 0);
+        return fuse_file_ioctl_common(file, cmd, arg, 0);
 }
 static long fuse_file_compat_ioctl(struct file *file, unsigned int cmd,
                                   unsigned long arg)
 {
-        return fuse_file_do_ioctl(file, cmd, arg, FUSE_IOCTL_COMPAT);
+        return fuse_file_ioctl_common(file, cmd, arg, FUSE_IOCTL_COMPAT);
 }
 /*
@@ -1841,11 +1897,10 @@ static void fuse_register_polled_file(struct fuse_conn *fc,
        spin_unlock(&fc->lock);
 }
-static unsigned fuse_file_poll(struct file *file, poll_table *wait)
+unsigned fuse_file_poll(struct file *file, poll_table *wait)
 {
-        struct inode *inode = file->f_dentry->d_inode;
        struct fuse_file *ff = file->private_data;
-        struct fuse_conn *fc = get_fuse_conn(inode);
+        struct fuse_conn *fc = ff->fc;
        struct fuse_poll_in inarg = { .fh = ff->fh, .kh = ff->kh };
        struct fuse_poll_out outarg;
        struct fuse_req *req;
@@ -1870,7 +1925,7 @@ static unsigned fuse_file_poll(struct file *file, poll_table *wait)
                return PTR_ERR(req);
        req->in.h.opcode = FUSE_POLL;
-        req->in.h.nodeid = get_node_id(inode);
+        req->in.h.nodeid = ff->nodeid;
        req->in.numargs = 1;
        req->in.args[0].size = sizeof(inarg);
        req->in.args[0].value = &inarg;
@@ -1889,6 +1944,7 @@ static unsigned fuse_file_poll(struct file *file, poll_table *wait)
        }
        return POLLERR;
 }
+EXPORT_SYMBOL_GPL(fuse_file_poll);
 /*
 * This is called from fuse_handle_notify() on FUSE_NOTIFY_POLL and
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 6fc5aedaa0d5..aaf2f9ff970e 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -97,8 +97,13 @@ struct fuse_inode {
        struct list_head writepages;
 };
+struct fuse_conn;
 /** FUSE specific file data */
 struct fuse_file {
+        /** Fuse connection for this file */
+        struct fuse_conn *fc;
        /** Request reserved for flush and release */
        struct fuse_req *reserved_req;
@@ -108,9 +113,15 @@ struct fuse_file {
        /** File handle used by userspace */
        u64 fh;
+        /** Node id of this file */
+        u64 nodeid;
        /** Refcount */
        atomic_t count;
+        /** FOPEN_* flags returned by open */
+        u32 open_flags;
        /** Entry on inode's write_files list */
        struct list_head write_entry;
@@ -185,8 +196,6 @@ enum fuse_req_state {
        FUSE_REQ_FINISHED
 };
-struct fuse_conn;
 /**
 * A request to the client
 */
@@ -248,11 +257,12 @@ struct fuse_req {
                struct fuse_forget_in forget_in;
                struct {
                        struct fuse_release_in in;
-                        struct vfsmount *vfsmount;
+                        struct path path;
-                        struct dentry *dentry;
                } release;
                struct fuse_init_in init_in;
                struct fuse_init_out init_out;
+                struct cuse_init_in cuse_init_in;
+                struct cuse_init_out cuse_init_out;
                struct {
                        struct fuse_read_in in;
                        u64 attr_ver;
@@ -386,6 +396,9 @@ struct fuse_conn {
        /** Filesystem supports NFS exporting.  Only set in INIT */
        unsigned export_support:1;
+        /** Set if bdi is valid */
+        unsigned bdi_initialized:1;
        /*
         * The following bitfields are only for optimization purposes
         * and hence races in setting them will not cause malfunction
@@ -515,25 +528,24 @@ void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
 * Initialize READ or READDIR request
 */
 void fuse_read_fill(struct fuse_req *req, struct file *file,
-                    struct inode *inode, loff_t pos, size_t count, int opcode);
+                    loff_t pos, size_t count, int opcode);
 /**
 * Send OPEN or OPENDIR request
 */
-int fuse_open_common(struct inode *inode, struct file *file, int isdir);
+int fuse_open_common(struct inode *inode, struct file *file, bool isdir);
 struct fuse_file *fuse_file_alloc(struct fuse_conn *fc);
+struct fuse_file *fuse_file_get(struct fuse_file *ff);
 void fuse_file_free(struct fuse_file *ff);
-void fuse_finish_open(struct inode *inode, struct file *file,
+void fuse_finish_open(struct inode *inode, struct file *file);
-                      struct fuse_file *ff, struct fuse_open_out *outarg);
-/** Fill in ff->reserved_req with a RELEASE request */
+void fuse_sync_release(struct fuse_file *ff, int flags);
-void fuse_release_fill(struct fuse_file *ff, u64 nodeid, int flags, int opcode);
 /**
 * Send RELEASE or RELEASEDIR request
 */
-int fuse_release_common(struct inode *inode, struct file *file, int isdir);
+void fuse_release_common(struct file *file, int opcode);
 /**
 * Send FSYNC or FSYNCDIR request
@@ -652,10 +664,12 @@ void fuse_invalidate_entry_cache(struct dentry *entry);
 */
 struct fuse_conn *fuse_conn_get(struct fuse_conn *fc);
+void fuse_conn_kill(struct fuse_conn *fc);
 /**
 * Initialize fuse_conn
 */
-int fuse_conn_init(struct fuse_conn *fc, struct super_block *sb);
+void fuse_conn_init(struct fuse_conn *fc);
 /**
 * Release reference to fuse_conn
@@ -694,4 +708,13 @@ void fuse_release_nowrite(struct inode *inode);
 u64 fuse_get_attr_version(struct fuse_conn *fc);
+int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
+                 bool isdir);
+ssize_t fuse_direct_io(struct file *file, const char __user *buf,
+                       size_t count, loff_t *ppos, int write);
+long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
+                   unsigned int flags);
+unsigned fuse_file_poll(struct file *file, poll_table *wait);
+int fuse_dev_release(struct inode *inode, struct file *file);
 #endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 91f7c85f1ffd..f0df55a52929 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -277,11 +277,14 @@ static void fuse_send_destroy(struct fuse_conn *fc)
        }
 }
-static void fuse_put_super(struct super_block *sb)
+static void fuse_bdi_destroy(struct fuse_conn *fc)
 {
-        struct fuse_conn *fc = get_fuse_conn_super(sb);
+        if (fc->bdi_initialized)
+                bdi_destroy(&fc->bdi);
+}
-        fuse_send_destroy(fc);
+void fuse_conn_kill(struct fuse_conn *fc)
+{
        spin_lock(&fc->lock);
        fc->connected = 0;
        fc->blocked = 0;
@@ -295,7 +298,16 @@ static void fuse_put_super(struct super_block *sb)
        list_del(&fc->entry);
        fuse_ctl_remove_conn(fc);
        mutex_unlock(&fuse_mutex);
-        bdi_destroy(&fc->bdi);
+        fuse_bdi_destroy(fc);
+}
+EXPORT_SYMBOL_GPL(fuse_conn_kill);
+static void fuse_put_super(struct super_block *sb)
+{
+        struct fuse_conn *fc = get_fuse_conn_super(sb);
+        fuse_send_destroy(fc);
+        fuse_conn_kill(fc);
        fuse_conn_put(fc);
 }
@@ -466,10 +478,8 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
        return 0;
 }
-int fuse_conn_init(struct fuse_conn *fc, struct super_block *sb)
+void fuse_conn_init(struct fuse_conn *fc)
 {
-        int err;
        memset(fc, 0, sizeof(*fc));
        spin_lock_init(&fc->lock);
        mutex_init(&fc->inst_mutex);
@@ -484,49 +494,12 @@ int fuse_conn_init(struct fuse_conn *fc, struct super_block *sb)
        INIT_LIST_HEAD(&fc->bg_queue);
        INIT_LIST_HEAD(&fc->entry);
        atomic_set(&fc->num_waiting, 0);
-        fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
-        fc->bdi.unplug_io_fn = default_unplug_io_fn;
-        /* fuse does it's own writeback accounting */
-        fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
        fc->khctr = 0;
        fc->polled_files = RB_ROOT;
-        fc->dev = sb->s_dev;
-        err = bdi_init(&fc->bdi);
-        if (err)
-                goto error_mutex_destroy;
-        if (sb->s_bdev) {
-                err = bdi_register(&fc->bdi, NULL, "%u:%u-fuseblk",
-                                   MAJOR(fc->dev), MINOR(fc->dev));
-        } else {
-                err = bdi_register_dev(&fc->bdi, fc->dev);
-        }
-        if (err)
-                goto error_bdi_destroy;
-        /*
-         * For a single fuse filesystem use max 1% of dirty +
-         * writeback threshold.
-         *
-         * This gives about 1M of write buffer for memory maps on a
-         * machine with 1G and 10% dirty_ratio, which should be more
-         * than enough.
-         *
-         * Privileged users can raise it by writing to
-         *
-         *    /sys/class/bdi/<bdi>/max_ratio
-         */
-        bdi_set_max_ratio(&fc->bdi, 1);
        fc->reqctr = 0;
        fc->blocked = 1;
        fc->attr_version = 1;
        get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
-        return 0;
- error_bdi_destroy:
-        bdi_destroy(&fc->bdi);
- error_mutex_destroy:
-        mutex_destroy(&fc->inst_mutex);
-        return err;
 }
 EXPORT_SYMBOL_GPL(fuse_conn_init);
@@ -539,12 +512,14 @@ void fuse_conn_put(struct fuse_conn *fc)
                fc->release(fc);
        }
 }
+EXPORT_SYMBOL_GPL(fuse_conn_put);
 struct fuse_conn *fuse_conn_get(struct fuse_conn *fc)
 {
        atomic_inc(&fc->count);
        return fc;
 }
+EXPORT_SYMBOL_GPL(fuse_conn_get);
 static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode)
 {
@@ -797,6 +772,48 @@ static void fuse_free_conn(struct fuse_conn *fc)
        kfree(fc);
 }
+static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
+{
+        int err;
+        fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+        fc->bdi.unplug_io_fn = default_unplug_io_fn;
+        /* fuse does it's own writeback accounting */
+        fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
+        err = bdi_init(&fc->bdi);
+        if (err)
+                return err;
+        fc->bdi_initialized = 1;
+        if (sb->s_bdev) {
+                err =  bdi_register(&fc->bdi, NULL, "%u:%u-fuseblk",
+                                    MAJOR(fc->dev), MINOR(fc->dev));
+        } else {
+                err = bdi_register_dev(&fc->bdi, fc->dev);
+        }
+        if (err)
+                return err;
+        /*
+         * For a single fuse filesystem use max 1% of dirty +
+         * writeback threshold.
+         *
+         * This gives about 1M of write buffer for memory maps on a
+         * machine with 1G and 10% dirty_ratio, which should be more
+         * than enough.
+         *
+         * Privileged users can raise it by writing to
+         *
+         *    /sys/class/bdi/<bdi>/max_ratio
+         */
+        bdi_set_max_ratio(&fc->bdi, 1);
+        return 0;
+}
 static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 {
        struct fuse_conn *fc;
@@ -843,11 +860,12 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
        if (!fc)
                goto err_fput;
-        err = fuse_conn_init(fc, sb);
+        fuse_conn_init(fc);
-        if (err) {
-                kfree(fc);
+        fc->dev = sb->s_dev;
-                goto err_fput;
+        err = fuse_bdi_init(fc, sb);
-        }
+        if (err)
+                goto err_put_conn;
        fc->release = fuse_free_conn;
        fc->flags = d.flags;
@@ -911,7 +929,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 err_put_root:
        dput(root_dentry);
 err_put_conn:
-        bdi_destroy(&fc->bdi);
+        fuse_bdi_destroy(fc);
        fuse_conn_put(fc);
 err_fput:
        fput(file);
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
index d53a9bea1c2f..3da2f1f4f738 100644
--- a/fs/gfs2/Makefile
+++ b/fs/gfs2/Makefile
@@ -1,3 +1,4 @@
+EXTRA_CFLAGS := -I$(src)
 obj-$(CONFIG_GFS2_FS) += gfs2.o
 gfs2-y := acl.o bmap.o dir.o eaops.o eattr.o glock.o \
        glops.o inode.o log.o lops.o main.o meta_io.o \
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 329763530dc0..6d47379e794b 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -25,6 +25,7 @@
 #include "trans.h"
 #include "dir.h"
 #include "util.h"
+#include "trace_gfs2.h"
 /* This doesn't need to be that large as max 64 bit pointers in a 4k
 * block is 512, so __u16 is fine for that. It saves stack space to
@@ -589,6 +590,7 @@ int gfs2_block_map(struct inode *inode, sector_t lblock,
        clear_buffer_mapped(bh_map);
        clear_buffer_new(bh_map);
        clear_buffer_boundary(bh_map);
+        trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
        if (gfs2_is_dir(ip)) {
                bsize = sdp->sd_jbsize;
                arr = sdp->sd_jheightsize;
@@ -623,6 +625,7 @@ int gfs2_block_map(struct inode *inode, sector_t lblock,
        ret = 0;
 out:
        release_metapath(&mp);
+        trace_gfs2_bmap(ip, bh_map, lblock, create, ret);
        bmap_unlock(ip, create);
        return ret;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 2bf62bcc5181..297421c0427a 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -39,6 +39,8 @@
 #include "super.h"
 #include "util.h"
 #include "bmap.h"
+#define CREATE_TRACE_POINTS
+#include "trace_gfs2.h"
 struct gfs2_gl_hash_bucket {
        struct hlist_head hb_list;
@@ -155,7 +157,7 @@ static void glock_free(struct gfs2_glock *gl)
        if (aspace)
                gfs2_aspace_put(aspace);
+        trace_gfs2_glock_put(gl);
        sdp->sd_lockstruct.ls_ops->lm_put_lock(gfs2_glock_cachep, gl);
 }
@@ -317,14 +319,17 @@ restart:
                                                return 2;
                                        gh->gh_error = ret;
                                        list_del_init(&gh->gh_list);
+                                        trace_gfs2_glock_queue(gh, 0);
                                        gfs2_holder_wake(gh);
                                        goto restart;
                                }
                                set_bit(HIF_HOLDER, &gh->gh_iflags);
+                                trace_gfs2_promote(gh, 1);
                                gfs2_holder_wake(gh);
                                goto restart;
                        }
                        set_bit(HIF_HOLDER, &gh->gh_iflags);
+                        trace_gfs2_promote(gh, 0);
                        gfs2_holder_wake(gh);
                        continue;
                }
@@ -354,6 +359,7 @@ static inline void do_error(struct gfs2_glock *gl, const int ret)
                else
                        continue;
                list_del_init(&gh->gh_list);
+                trace_gfs2_glock_queue(gh, 0);
                gfs2_holder_wake(gh);
        }
 }
@@ -422,6 +428,7 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret)
        int rv;
        spin_lock(&gl->gl_spin);
+        trace_gfs2_glock_state_change(gl, state);
        state_change(gl, state);
        gh = find_first_waiter(gl);
@@ -851,6 +858,7 @@ static void handle_callback(struct gfs2_glock *gl, unsigned int state,
                        gl->gl_demote_state != state) {
                gl->gl_demote_state = LM_ST_UNLOCKED;
        }
+        trace_gfs2_demote_rq(gl);
 }
 /**
@@ -936,6 +944,7 @@ fail:
                        goto do_cancel;
                return;
        }
+        trace_gfs2_glock_queue(gh, 1);
        list_add_tail(&gh->gh_list, insert_pt);
 do_cancel:
        gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
@@ -1032,6 +1041,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
                    !test_bit(GLF_DEMOTE, &gl->gl_flags))
                        fast_path = 1;
        }
+        trace_gfs2_glock_queue(gh, 0);
        spin_unlock(&gl->gl_spin);
        if (likely(fast_path))
                return;
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index f2e449c595b4..13c6237c5f67 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -28,6 +28,7 @@
 #include "meta_io.h"
 #include "util.h"
 #include "dir.h"
+#include "trace_gfs2.h"
 #define PULL 1
@@ -313,6 +314,7 @@ int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
                gfs2_log_lock(sdp);
        }
        atomic_sub(blks, &sdp->sd_log_blks_free);
+        trace_gfs2_log_blocks(sdp, -blks);
        gfs2_log_unlock(sdp);
        mutex_unlock(&sdp->sd_log_reserve_mutex);
@@ -333,6 +335,7 @@ void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks)
        gfs2_log_lock(sdp);
        atomic_add(blks, &sdp->sd_log_blks_free);
+        trace_gfs2_log_blocks(sdp, blks);
        gfs2_assert_withdraw(sdp,
                             atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks);
        gfs2_log_unlock(sdp);
@@ -558,6 +561,7 @@ static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail)
        gfs2_log_lock(sdp);
        atomic_add(dist, &sdp->sd_log_blks_free);
+        trace_gfs2_log_blocks(sdp, dist);
        gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks);
        gfs2_log_unlock(sdp);
@@ -715,6 +719,7 @@ void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
                up_write(&sdp->sd_log_flush_lock);
                return;
        }
+        trace_gfs2_log_flush(sdp, 1);
        ai = kzalloc(sizeof(struct gfs2_ail), GFP_NOFS | __GFP_NOFAIL);
        INIT_LIST_HEAD(&ai->ai_ail1_list);
@@ -746,6 +751,7 @@ void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
        else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){
                gfs2_log_lock(sdp);
                atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
+                trace_gfs2_log_blocks(sdp, -1);
                gfs2_log_unlock(sdp);
                log_write_header(sdp, 0, PULL);
        }
@@ -763,7 +769,7 @@ void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
                ai = NULL;
        }
        gfs2_log_unlock(sdp);
+        trace_gfs2_log_flush(sdp, 0);
        up_write(&sdp->sd_log_flush_lock);
        kfree(ai);
@@ -787,6 +793,7 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
        gfs2_assert_withdraw(sdp, sdp->sd_log_blks_reserved + tr->tr_reserved >= reserved);
        unused = sdp->sd_log_blks_reserved - reserved + tr->tr_reserved;
        atomic_add(unused, &sdp->sd_log_blks_free);
+        trace_gfs2_log_blocks(sdp, unused);
        gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <=
                             sdp->sd_jdesc->jd_blocks);
        sdp->sd_log_blks_reserved = reserved;
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 00315f50fa46..9969ff062c5b 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -27,6 +27,7 @@
 #include "rgrp.h"
 #include "trans.h"
 #include "util.h"
+#include "trace_gfs2.h"
 /**
 * gfs2_pin - Pin a buffer in memory
@@ -53,6 +54,7 @@ static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
        if (bd->bd_ail)
                list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list);
        get_bh(bh);
+        trace_gfs2_pin(bd, 1);
 }
 /**
@@ -89,6 +91,7 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
        bd->bd_ail = ai;
        list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
        clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
+        trace_gfs2_pin(bd, 0);
        gfs2_log_unlock(sdp);
        unlock_buffer(bh);
 }
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index cc34f271b3e7..7bc3c45cd676 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -33,6 +33,7 @@
 #include "log.h"
 #include "quota.h"
 #include "dir.h"
+#include "trace_gfs2.h"
 #define DO 0
 #define UNDO 1
@@ -775,6 +776,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
                /* Map the extents for this journal's blocks */
                map_journal_extents(sdp);
        }
+        trace_gfs2_log_blocks(sdp, atomic_read(&sdp->sd_log_blks_free));
        if (sdp->sd_lockstruct.ls_first) {
                unsigned int x;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index de3239731db8..daa4ae341a29 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -29,6 +29,7 @@
 #include "util.h"
 #include "log.h"
 #include "inode.h"
+#include "trace_gfs2.h"
 #define BFITNOENT ((u32)~0)
 #define NO_BLOCK ((u64)~0)
@@ -1519,7 +1520,7 @@ int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n)
        spin_lock(&sdp->sd_rindex_spin);
        rgd->rd_free_clone -= *n;
        spin_unlock(&sdp->sd_rindex_spin);
+        trace_gfs2_block_alloc(ip, block, *n, GFS2_BLKST_USED);
        *bn = block;
        return 0;
@@ -1571,7 +1572,7 @@ u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation)
        spin_lock(&sdp->sd_rindex_spin);
        rgd->rd_free_clone--;
        spin_unlock(&sdp->sd_rindex_spin);
+        trace_gfs2_block_alloc(dip, block, 1, GFS2_BLKST_DINODE);
        return block;
 }
@@ -1591,7 +1592,7 @@ void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
        rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
        if (!rgd)
                return;
+        trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE);
        rgd->rd_free += blen;
        gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
@@ -1619,7 +1620,7 @@ void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
        rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
        if (!rgd)
                return;
+        trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE);
        rgd->rd_free += blen;
        gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
@@ -1642,6 +1643,7 @@ void gfs2_unlink_di(struct inode *inode)
        rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED);
        if (!rgd)
                return;
+        trace_gfs2_block_alloc(ip, blkno, 1, GFS2_BLKST_UNLINKED);
        gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
        gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
        gfs2_trans_add_rg(rgd);
@@ -1673,6 +1675,7 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
 void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
 {
        gfs2_free_uninit_di(rgd, ip->i_no_addr);
+        trace_gfs2_block_alloc(ip, ip->i_no_addr, 1, GFS2_BLKST_FREE);
        gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
        gfs2_meta_wipe(ip, ip->i_no_addr, 1);
 }
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index c8930b31cdf0..0a6801336470 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -719,8 +719,6 @@ static void gfs2_put_super(struct super_block *sb)
        int error;
        struct gfs2_jdesc *jd;
-        lock_kernel();
        /*  Unfreeze the filesystem, if we need to  */
        mutex_lock(&sdp->sd_freeze_lock);
@@ -787,8 +785,6 @@ restart:
        /*  At this point, we're through participating in the lockspace  */
        gfs2_sys_fs_del(sdp);
-        unlock_kernel();
 }
 /**
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
new file mode 100644
index 000000000000..98d6ef1c1dc0
--- /dev/null
+++ b/fs/gfs2/trace_gfs2.h
@@ -0,0 +1,407 @@
+#if !defined(_TRACE_GFS2_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_GFS2_H
+#include <linux/tracepoint.h>
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM gfs2
+#define TRACE_INCLUDE_FILE trace_gfs2
+#include <linux/fs.h>
+#include <linux/buffer_head.h>
+#include <linux/dlmconstants.h>
+#include <linux/gfs2_ondisk.h>
+#include "incore.h"
+#include "glock.h"
+#define dlm_state_name(nn) { DLM_LOCK_##nn, #nn }
+#define glock_trace_name(x) __print_symbolic(x,         \
+                            dlm_state_name(IV),         \
+                            dlm_state_name(NL),         \
+                            dlm_state_name(CR),         \
+                            dlm_state_name(CW),         \
+                            dlm_state_name(PR),         \
+                            dlm_state_name(PW),         \
+                            dlm_state_name(EX))
+#define block_state_name(x) __print_symbolic(x,                 \
+                            { GFS2_BLKST_FREE, "free" },        \
+                            { GFS2_BLKST_USED, "used" },        \
+                            { GFS2_BLKST_DINODE, "dinode" },    \
+                            { GFS2_BLKST_UNLINKED, "unlinked" })
+#define show_glock_flags(flags) __print_flags(flags, "",        \
+        {(1UL << GLF_LOCK),                     "l" },          \
+        {(1UL << GLF_DEMOTE),                   "D" },          \
+        {(1UL << GLF_PENDING_DEMOTE),           "d" },          \
+        {(1UL << GLF_DEMOTE_IN_PROGRESS),       "p" },          \
+        {(1UL << GLF_DIRTY),                    "y" },          \
+        {(1UL << GLF_LFLUSH),                   "f" },          \
+        {(1UL << GLF_INVALIDATE_IN_PROGRESS),   "i" },          \
+        {(1UL << GLF_REPLY_PENDING),            "r" },          \
+        {(1UL << GLF_INITIAL),                  "I" },          \
+        {(1UL << GLF_FROZEN),                   "F" })
+#ifndef NUMPTY
+#define NUMPTY
+static inline u8 glock_trace_state(unsigned int state)
+{
+        switch(state) {
+        case LM_ST_SHARED:
+                return DLM_LOCK_PR;
+        case LM_ST_DEFERRED:
+                return DLM_LOCK_CW;
+        case LM_ST_EXCLUSIVE:
+                return DLM_LOCK_EX;
+        }
+        return DLM_LOCK_NL;
+}
+#endif
+/* Section 1 - Locking
+ *
+ * Objectives:
+ * Latency: Remote demote request to state change
+ * Latency: Local lock request to state change
+ * Latency: State change to lock grant
+ * Correctness: Ordering of local lock state vs. I/O requests
+ * Correctness: Responses to remote demote requests
+ */
+/* General glock state change (DLM lock request completes) */
+TRACE_EVENT(gfs2_glock_state_change,
+        TP_PROTO(const struct gfs2_glock *gl, unsigned int new_state),
+        TP_ARGS(gl, new_state),
+        TP_STRUCT__entry(
+                __field(        dev_t,  dev                     )
+                __field(        u64,    glnum                   )
+                __field(        u32,    gltype                  )
+                __field(        u8,     cur_state               )
+                __field(        u8,     new_state               )
+                __field(        u8,     dmt_state               )
+                __field(        u8,     tgt_state               )
+                __field(        unsigned long,  flags           )
+        ),
+        TP_fast_assign(
+                __entry->dev            = gl->gl_sbd->sd_vfs->s_dev;
+                __entry->glnum          = gl->gl_name.ln_number;
+                __entry->gltype         = gl->gl_name.ln_type;
+                __entry->cur_state      = glock_trace_state(gl->gl_state);
+                __entry->new_state      = glock_trace_state(new_state);
+                __entry->tgt_state      = glock_trace_state(gl->gl_target);
+                __entry->dmt_state      = glock_trace_state(gl->gl_demote_state);
+                __entry->flags          = gl->gl_flags;
+        ),
+        TP_printk("%u,%u glock %d:%lld state %s to %s tgt:%s dmt:%s flags:%s",
+                  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype,
+                 (unsigned long long)__entry->glnum,
+                  glock_trace_name(__entry->cur_state),
+                  glock_trace_name(__entry->new_state),
+                  glock_trace_name(__entry->tgt_state),
+                  glock_trace_name(__entry->dmt_state),
+                  show_glock_flags(__entry->flags))
+);
+/* State change -> unlocked, glock is being deallocated */
+TRACE_EVENT(gfs2_glock_put,
+        TP_PROTO(const struct gfs2_glock *gl),
+        TP_ARGS(gl),
+        TP_STRUCT__entry(
+                __field(        dev_t,  dev                     )
+                __field(        u64,    glnum                   )
+                __field(        u32,    gltype                  )
+                __field(        u8,     cur_state               )
+                __field(        unsigned long,  flags           )
+        ),
+        TP_fast_assign(
+                __entry->dev            = gl->gl_sbd->sd_vfs->s_dev;
+                __entry->gltype         = gl->gl_name.ln_type;
+                __entry->glnum          = gl->gl_name.ln_number;
+                __entry->cur_state      = glock_trace_state(gl->gl_state);
+                __entry->flags          = gl->gl_flags;
+        ),
+        TP_printk("%u,%u glock %d:%lld state %s => %s flags:%s",
+                  MAJOR(__entry->dev), MINOR(__entry->dev),
+                  __entry->gltype, (unsigned long long)__entry->glnum,
+                  glock_trace_name(__entry->cur_state),
+                  glock_trace_name(DLM_LOCK_IV),
+                  show_glock_flags(__entry->flags))
+);
+/* Callback (local or remote) requesting lock demotion */
+TRACE_EVENT(gfs2_demote_rq,
+        TP_PROTO(const struct gfs2_glock *gl),
+        TP_ARGS(gl),
+        TP_STRUCT__entry(
+                __field(        dev_t,  dev                     )
+                __field(        u64,    glnum                   )
+                __field(        u32,    gltype                  )
+                __field(        u8,     cur_state               )
+                __field(        u8,     dmt_state               )
+                __field(        unsigned long,  flags           )
+        ),
+        TP_fast_assign(
+                __entry->dev            = gl->gl_sbd->sd_vfs->s_dev;
+                __entry->gltype         = gl->gl_name.ln_type;
+                __entry->glnum          = gl->gl_name.ln_number;
+                __entry->cur_state      = glock_trace_state(gl->gl_state);
+                __entry->dmt_state      = glock_trace_state(gl->gl_demote_state);
+                __entry->flags          = gl->gl_flags;
+        ),
+        TP_printk("%u,%u glock %d:%lld demote %s to %s flags:%s",
+                  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype,
+                  (unsigned long long)__entry->glnum,
+                  glock_trace_name(__entry->cur_state),
+                  glock_trace_name(__entry->dmt_state),
+                  show_glock_flags(__entry->flags))
+);
+/* Promotion/grant of a glock */
+TRACE_EVENT(gfs2_promote,
+        TP_PROTO(const struct gfs2_holder *gh, int first),
+        TP_ARGS(gh, first),
+        TP_STRUCT__entry(
+                __field(        dev_t,  dev                     )
+                __field(        u64,    glnum                   )
+                __field(        u32,    gltype                  )
+                __field(        int,    first                   )
+                __field(        u8,     state                   )
+        ),
+        TP_fast_assign(
+                __entry->dev    = gh->gh_gl->gl_sbd->sd_vfs->s_dev;
+                __entry->glnum  = gh->gh_gl->gl_name.ln_number;
+                __entry->gltype = gh->gh_gl->gl_name.ln_type;
+                __entry->first  = first;
+                __entry->state  = glock_trace_state(gh->gh_state);
+        ),
+        TP_printk("%u,%u glock %u:%llu promote %s %s",
+                  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype,
+                  (unsigned long long)__entry->glnum,
+                  __entry->first ? "first": "other",
+                  glock_trace_name(__entry->state))
+);
+/* Queue/dequeue a lock request */
+TRACE_EVENT(gfs2_glock_queue,
+        TP_PROTO(const struct gfs2_holder *gh, int queue),
+        TP_ARGS(gh, queue),
+        TP_STRUCT__entry(
+                __field(        dev_t,  dev                     )
+                __field(        u64,    glnum                   )
+                __field(        u32,    gltype                  )
+                __field(        int,    queue                   )
+                __field(        u8,     state                   )
+        ),
+        TP_fast_assign(
+                __entry->dev    = gh->gh_gl->gl_sbd->sd_vfs->s_dev;
+                __entry->glnum  = gh->gh_gl->gl_name.ln_number;
+                __entry->gltype = gh->gh_gl->gl_name.ln_type;
+                __entry->queue  = queue;
+                __entry->state  = glock_trace_state(gh->gh_state);
+        ),
+        TP_printk("%u,%u glock %u:%llu %squeue %s",
+                  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype,
+                  (unsigned long long)__entry->glnum,
+                  __entry->queue ? "" : "de",
+                  glock_trace_name(__entry->state))
+);
+/* Section 2 - Log/journal
+ *
+ * Objectives:
+ * Latency: Log flush time
+ * Correctness: pin/unpin vs. disk I/O ordering
+ * Performance: Log usage stats
+ */
+/* Pin/unpin a block in the log */
+TRACE_EVENT(gfs2_pin,
+        TP_PROTO(const struct gfs2_bufdata *bd, int pin),
+        TP_ARGS(bd, pin),
+        TP_STRUCT__entry(
+                __field(        dev_t,  dev                     )
+                __field(        int,    pin                     )
+                __field(        u32,    len                     )
+                __field(        sector_t,       block           )
+                __field(        u64,    ino                     )
+        ),
+        TP_fast_assign(
+                __entry->dev            = bd->bd_gl->gl_sbd->sd_vfs->s_dev;
+                __entry->pin            = pin;
+                __entry->len            = bd->bd_bh->b_size;
+                __entry->block          = bd->bd_bh->b_blocknr;
+                __entry->ino            = bd->bd_gl->gl_name.ln_number;
+        ),
+        TP_printk("%u,%u log %s %llu/%lu inode %llu",
+                  MAJOR(__entry->dev), MINOR(__entry->dev),
+                  __entry->pin ? "pin" : "unpin",
+                  (unsigned long long)__entry->block,
+                  (unsigned long)__entry->len,
+                  (unsigned long long)__entry->ino)
+);
+/* Flushing the log */
+TRACE_EVENT(gfs2_log_flush,
+        TP_PROTO(const struct gfs2_sbd *sdp, int start),
+        TP_ARGS(sdp, start),
+        TP_STRUCT__entry(
+                __field(        dev_t,  dev                     )
+                __field(        int,    start                   )
+                __field(        u64,    log_seq                 )
+        ),
+        TP_fast_assign(
+                __entry->dev            = sdp->sd_vfs->s_dev;
+                __entry->start          = start;
+                __entry->log_seq        = sdp->sd_log_sequence;
+        ),
+        TP_printk("%u,%u log flush %s %llu",
+                  MAJOR(__entry->dev), MINOR(__entry->dev),
+                  __entry->start ? "start" : "end",
+                  (unsigned long long)__entry->log_seq)
+);
+/* Reserving/releasing blocks in the log */
+TRACE_EVENT(gfs2_log_blocks,
+        TP_PROTO(const struct gfs2_sbd *sdp, int blocks),
+        TP_ARGS(sdp, blocks),
+        TP_STRUCT__entry(
+                __field(        dev_t,  dev                     )
+                __field(        int,    blocks                  )
+        ),
+        TP_fast_assign(
+                __entry->dev            = sdp->sd_vfs->s_dev;
+                __entry->blocks         = blocks;
+        ),
+        TP_printk("%u,%u log reserve %d", MAJOR(__entry->dev),
+                  MINOR(__entry->dev), __entry->blocks)
+);
+/* Section 3 - bmap
+ *
+ * Objectives:
+ * Latency: Bmap request time
+ * Performance: Block allocator tracing
+ * Correctness: Test of disard generation vs. blocks allocated
+ */
+/* Map an extent of blocks, possibly a new allocation */
+TRACE_EVENT(gfs2_bmap,
+        TP_PROTO(const struct gfs2_inode *ip, const struct buffer_head *bh,
+                sector_t lblock, int create, int errno),
+        TP_ARGS(ip, bh, lblock, create, errno),
+        TP_STRUCT__entry(
+                __field(        dev_t,  dev                     )
+                __field(        sector_t, lblock                )
+                __field(        sector_t, pblock                )
+                __field(        u64,    inum                    )
+                __field(        unsigned long, state            )
+                __field(        u32,    len                     )
+                __field(        int,    create                  )
+                __field(        int,    errno                   )
+        ),
+        TP_fast_assign(
+                __entry->dev            = ip->i_gl->gl_sbd->sd_vfs->s_dev;
+                __entry->lblock         = lblock;
+                __entry->pblock         = buffer_mapped(bh) ?  bh->b_blocknr : 0;
+                __entry->inum           = ip->i_no_addr;
+                __entry->state          = bh->b_state;
+                __entry->len            = bh->b_size;
+                __entry->create         = create;
+                __entry->errno          = errno;
+        ),
+        TP_printk("%u,%u bmap %llu map %llu/%lu to %llu flags:%08lx %s %d",
+                  MAJOR(__entry->dev), MINOR(__entry->dev),
+                  (unsigned long long)__entry->inum,
+                  (unsigned long long)__entry->lblock,
+                  (unsigned long)__entry->len,
+                  (unsigned long long)__entry->pblock,
+                  __entry->state, __entry->create ? "create " : "nocreate",
+                  __entry->errno)
+);
+/* Keep track of blocks as they are allocated/freed */
+TRACE_EVENT(gfs2_block_alloc,
+        TP_PROTO(const struct gfs2_inode *ip, u64 block, unsigned len,
+                u8 block_state),
+        TP_ARGS(ip, block, len, block_state),
+        TP_STRUCT__entry(
+                __field(        dev_t,  dev                     )
+                __field(        u64,    start                   )
+                __field(        u64,    inum                    )
+                __field(        u32,    len                     )
+                __field(        u8,     block_state             )
+        ),
+        TP_fast_assign(
+                __entry->dev            = ip->i_gl->gl_sbd->sd_vfs->s_dev;
+                __entry->start          = block;
+                __entry->inum           = ip->i_no_addr;
+                __entry->len            = len;
+                __entry->block_state    = block_state;
+        ),
+        TP_printk("%u,%u bmap %llu alloc %llu/%lu %s",
+                  MAJOR(__entry->dev), MINOR(__entry->dev),
+                  (unsigned long long)__entry->inum,
+                  (unsigned long long)__entry->start,
+                  (unsigned long)__entry->len,
+                  block_state_name(__entry->block_state))
+);
+#endif /* _TRACE_GFS2_H */
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#include <trace/define_trace.h>
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 0af36085eb28..1a9c7878f864 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -556,27 +556,49 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
        /* add partitions */
        for (p = 1; p < state->limit; p++) {
-                sector_t size = state->parts[p].size;
+                sector_t size, from;
-                sector_t from = state->parts[p].from;
+try_scan:
+                size = state->parts[p].size;
                if (!size)
                        continue;
+                from = state->parts[p].from;
                if (from >= get_capacity(disk)) {
                        printk(KERN_WARNING
                               "%s: p%d ignored, start %llu is behind the end of the disk\n",
                               disk->disk_name, p, (unsigned long long) from);
                        continue;
                }
                if (from + size > get_capacity(disk)) {
-                        /*
+                        struct block_device_operations *bdops = disk->fops;
-                         * we can not ignore partitions of broken tables
+                        unsigned long long capacity;
-                         * created by for example camera firmware, but we
-                         * limit them to the end of the disk to avoid
-                         * creating invalid block devices
-                         */
                        printk(KERN_WARNING
-                               "%s: p%d size %llu limited to end of disk\n",
+                               "%s: p%d size %llu exceeds device capacity, ",
                               disk->disk_name, p, (unsigned long long) size);
-                        size = get_capacity(disk) - from;
+                        if (bdops->set_capacity &&
+                            (disk->flags & GENHD_FL_NATIVE_CAPACITY) == 0) {
+                                printk(KERN_CONT "enabling native capacity\n");
+                                capacity = bdops->set_capacity(disk, ~0ULL);
+                                disk->flags |= GENHD_FL_NATIVE_CAPACITY;
+                                if (capacity > get_capacity(disk)) {
+                                        set_capacity(disk, capacity);
+                                        check_disk_size_change(disk, bdev);
+                                        bdev->bd_invalidated = 0;
+                                }
+                                goto try_scan;
+                        } else {
+                                /*
+                                 * we can not ignore partitions of broken tables
+                                 * created by for example camera firmware, but
+                                 * we limit them to the end of the disk to avoid
+                                 * creating invalid block devices
+                                 */
+                                printk(KERN_CONT "limited to end of disk\n");
+                                size = get_capacity(disk) - from;
+                        }
                }
                part = add_partition(disk, p, from, size,
                                     state->parts[p].flags);
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 29228f5899cd..480f28127f09 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -39,6 +39,7 @@ config XFS_QUOTA
 config XFS_POSIX_ACL
        bool "XFS POSIX ACL support"
        depends on XFS_FS
+        select FS_POSIX_ACL
        help
          POSIX Access Control Lists (ACLs) support permissions for users and
          groups beyond the owner/group/world scheme.
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 60f107e47fe9..7a59daed1782 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -40,7 +40,7 @@ xfs-$(CONFIG_PROC_FS)		+= quota/xfs_qm_stats.o
 endif
 xfs-$(CONFIG_XFS_RT)            += xfs_rtalloc.o
-xfs-$(CONFIG_XFS_POSIX_ACL)     += xfs_acl.o
+xfs-$(CONFIG_XFS_POSIX_ACL)     += $(XFS_LINUX)/xfs_acl.o
 xfs-$(CONFIG_PROC_FS)           += $(XFS_LINUX)/xfs_stats.o
 xfs-$(CONFIG_SYSCTL)            += $(XFS_LINUX)/xfs_sysctl.o
 xfs-$(CONFIG_COMPAT)            += $(XFS_LINUX)/xfs_ioctl32.o
@@ -88,8 +88,7 @@ xfs-y				+= xfs_alloc.o \
                                   xfs_utils.o \
                                   xfs_vnodeops.o \
                                   xfs_rw.o \
-                                   xfs_dmops.o \
+                                   xfs_dmops.o
-                                   xfs_qmops.o
 xfs-$(CONFIG_XFS_TRACE)         += xfs_btree_trace.o \
                                   xfs_dir2_trace.o
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
new file mode 100644
index 000000000000..1e9d1246eebc
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -0,0 +1,523 @@
+/*
+ * Copyright (c) 2008, Christoph Hellwig
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_acl.h"
+#include "xfs_attr.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_vnodeops.h"
+#include <linux/xattr.h>
+#include <linux/posix_acl_xattr.h>
+#define XFS_ACL_NOT_CACHED      ((void *)-1)
+/*
+ * Locking scheme:
+ *  - all ACL updates are protected by inode->i_mutex, which is taken before
+ *    calling into this file.
+ *  - access and updates to the ip->i_acl and ip->i_default_acl pointers are
+ *    protected by inode->i_lock.
+ */
+STATIC struct posix_acl *
+xfs_acl_from_disk(struct xfs_acl *aclp)
+{
+        struct posix_acl_entry *acl_e;
+        struct posix_acl *acl;
+        struct xfs_acl_entry *ace;
+        int count, i;
+        count = be32_to_cpu(aclp->acl_cnt);
+        acl = posix_acl_alloc(count, GFP_KERNEL);
+        if (!acl)
+                return ERR_PTR(-ENOMEM);
+        for (i = 0; i < count; i++) {
+                acl_e = &acl->a_entries[i];
+                ace = &aclp->acl_entry[i];
+                /*
+                 * The tag is 32 bits on disk and 16 bits in core.
+                 *
+                 * Because every access to it goes through the core
+                 * format first this is not a problem.
+                 */
+                acl_e->e_tag = be32_to_cpu(ace->ae_tag);
+                acl_e->e_perm = be16_to_cpu(ace->ae_perm);
+                switch (acl_e->e_tag) {
+                case ACL_USER:
+                case ACL_GROUP:
+                        acl_e->e_id = be32_to_cpu(ace->ae_id);
+                        break;
+                case ACL_USER_OBJ:
+                case ACL_GROUP_OBJ:
+                case ACL_MASK:
+                case ACL_OTHER:
+                        acl_e->e_id = ACL_UNDEFINED_ID;
+                        break;
+                default:
+                        goto fail;
+                }
+        }
+        return acl;
+fail:
+        posix_acl_release(acl);
+        return ERR_PTR(-EINVAL);
+}
+STATIC void
+xfs_acl_to_disk(struct xfs_acl *aclp, const struct posix_acl *acl)
+{
+        const struct posix_acl_entry *acl_e;
+        struct xfs_acl_entry *ace;
+        int i;
+        aclp->acl_cnt = cpu_to_be32(acl->a_count);
+        for (i = 0; i < acl->a_count; i++) {
+                ace = &aclp->acl_entry[i];
+                acl_e = &acl->a_entries[i];
+                ace->ae_tag = cpu_to_be32(acl_e->e_tag);
+                ace->ae_id = cpu_to_be32(acl_e->e_id);
+                ace->ae_perm = cpu_to_be16(acl_e->e_perm);
+        }
+}
+/*
+ * Update the cached ACL pointer in the inode.
+ *
+ * Because we don't hold any locks while reading/writing the attribute
+ * from/to disk another thread could have raced and updated the cached
+ * ACL value before us. In that case we release the previous cached value
+ * and update it with our new value.
+ */
+STATIC void
+xfs_update_cached_acl(struct inode *inode, struct posix_acl **p_acl,
+                struct posix_acl *acl)
+{
+        spin_lock(&inode->i_lock);
+        if (*p_acl && *p_acl != XFS_ACL_NOT_CACHED)
+                posix_acl_release(*p_acl);
+        *p_acl = posix_acl_dup(acl);
+        spin_unlock(&inode->i_lock);
+}
+struct posix_acl *
+xfs_get_acl(struct inode *inode, int type)
+{
+        struct xfs_inode *ip = XFS_I(inode);
+        struct posix_acl *acl = NULL, **p_acl;
+        struct xfs_acl *xfs_acl;
+        int len = sizeof(struct xfs_acl);
+        char *ea_name;
+        int error;
+        switch (type) {
+        case ACL_TYPE_ACCESS:
+                ea_name = SGI_ACL_FILE;
+                p_acl = &ip->i_acl;
+                break;
+        case ACL_TYPE_DEFAULT:
+                ea_name = SGI_ACL_DEFAULT;
+                p_acl = &ip->i_default_acl;
+                break;
+        default:
+                return ERR_PTR(-EINVAL);
+        }
+        spin_lock(&inode->i_lock);
+        if (*p_acl != XFS_ACL_NOT_CACHED)
+                acl = posix_acl_dup(*p_acl);
+        spin_unlock(&inode->i_lock);
+        /*
+         * If we have a cached ACLs value just return it, not need to
+         * go out to the disk.
+         */
+        if (acl)
+                return acl;
+        xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL);
+        if (!xfs_acl)
+                return ERR_PTR(-ENOMEM);
+        error = -xfs_attr_get(ip, ea_name, (char *)xfs_acl, &len, ATTR_ROOT);
+        if (error) {
+                /*
+                 * If the attribute doesn't exist make sure we have a negative
+                 * cache entry, for any other error assume it is transient and
+                 * leave the cache entry as XFS_ACL_NOT_CACHED.
+                 */
+                if (error == -ENOATTR) {
+                        acl = NULL;
+                        goto out_update_cache;
+                }
+                goto out;
+        }
+        acl = xfs_acl_from_disk(xfs_acl);
+        if (IS_ERR(acl))
+                goto out;
+ out_update_cache:
+        xfs_update_cached_acl(inode, p_acl, acl);
+ out:
+        kfree(xfs_acl);
+        return acl;
+}
+STATIC int
+xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
+{
+        struct xfs_inode *ip = XFS_I(inode);
+        struct posix_acl **p_acl;
+        char *ea_name;
+        int error;
+        if (S_ISLNK(inode->i_mode))
+                return -EOPNOTSUPP;
+        switch (type) {
+        case ACL_TYPE_ACCESS:
+                ea_name = SGI_ACL_FILE;
+                p_acl = &ip->i_acl;
+                break;
+        case ACL_TYPE_DEFAULT:
+                if (!S_ISDIR(inode->i_mode))
+                        return acl ? -EACCES : 0;
+                ea_name = SGI_ACL_DEFAULT;
+                p_acl = &ip->i_default_acl;
+                break;
+        default:
+                return -EINVAL;
+        }
+        if (acl) {
+                struct xfs_acl *xfs_acl;
+                int len;
+                xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL);
+                if (!xfs_acl)
+                        return -ENOMEM;
+                xfs_acl_to_disk(xfs_acl, acl);
+                len = sizeof(struct xfs_acl) -
+                        (sizeof(struct xfs_acl_entry) *
+                         (XFS_ACL_MAX_ENTRIES - acl->a_count));
+                error = -xfs_attr_set(ip, ea_name, (char *)xfs_acl,
+                                len, ATTR_ROOT);
+                kfree(xfs_acl);
+        } else {
+                /*
+                 * A NULL ACL argument means we want to remove the ACL.
+                 */
+                error = -xfs_attr_remove(ip, ea_name, ATTR_ROOT);
+                /*
+                 * If the attribute didn't exist to start with that's fine.
+                 */
+                if (error == -ENOATTR)
+                        error = 0;
+        }
+        if (!error)
+                xfs_update_cached_acl(inode, p_acl, acl);
+        return error;
+}
+int
+xfs_check_acl(struct inode *inode, int mask)
+{
+        struct xfs_inode *ip = XFS_I(inode);
+        struct posix_acl *acl;
+        int error = -EAGAIN;
+        xfs_itrace_entry(ip);
+        /*
+         * If there is no attribute fork no ACL exists on this inode and
+         * we can skip the whole exercise.
+         */
+        if (!XFS_IFORK_Q(ip))
+                return -EAGAIN;
+        acl = xfs_get_acl(inode, ACL_TYPE_ACCESS);
+        if (IS_ERR(acl))
+                return PTR_ERR(acl);
+        if (acl) {
+                error = posix_acl_permission(inode, acl, mask);
+                posix_acl_release(acl);
+        }
+        return error;
+}
+static int
+xfs_set_mode(struct inode *inode, mode_t mode)
+{
+        int error = 0;
+        if (mode != inode->i_mode) {
+                struct iattr iattr;
+                iattr.ia_valid = ATTR_MODE;
+                iattr.ia_mode = mode;
+                error = -xfs_setattr(XFS_I(inode), &iattr, XFS_ATTR_NOACL);
+        }
+        return error;
+}
+static int
+xfs_acl_exists(struct inode *inode, char *name)
+{
+        int len = sizeof(struct xfs_acl);
+        return (xfs_attr_get(XFS_I(inode), name, NULL, &len,
+                            ATTR_ROOT|ATTR_KERNOVAL) == 0);
+}
+int
+posix_acl_access_exists(struct inode *inode)
+{
+        return xfs_acl_exists(inode, SGI_ACL_FILE);
+}
+int
+posix_acl_default_exists(struct inode *inode)
+{
+        if (!S_ISDIR(inode->i_mode))
+                return 0;
+        return xfs_acl_exists(inode, SGI_ACL_DEFAULT);
+}
+/*
+ * No need for i_mutex because the inode is not yet exposed to the VFS.
+ */
+int
+xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl)
+{
+        struct posix_acl *clone;
+        mode_t mode;
+        int error = 0, inherit = 0;
+        if (S_ISDIR(inode->i_mode)) {
+                error = xfs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl);
+                if (error)
+                        return error;
+        }
+        clone = posix_acl_clone(default_acl, GFP_KERNEL);
+        if (!clone)
+                return -ENOMEM;
+        mode = inode->i_mode;
+        error = posix_acl_create_masq(clone, &mode);
+        if (error < 0)
+                goto out_release_clone;
+        /*
+         * If posix_acl_create_masq returns a positive value we need to
+         * inherit a permission that can't be represented using the Unix
+         * mode bits and we actually need to set an ACL.
+         */
+        if (error > 0)
+                inherit = 1;
+        error = xfs_set_mode(inode, mode);
+        if (error)
+                goto out_release_clone;
+        if (inherit)
+                error = xfs_set_acl(inode, ACL_TYPE_ACCESS, clone);
+ out_release_clone:
+        posix_acl_release(clone);
+        return error;
+}
+int
+xfs_acl_chmod(struct inode *inode)
+{
+        struct posix_acl *acl, *clone;
+        int error;
+        if (S_ISLNK(inode->i_mode))
+                return -EOPNOTSUPP;
+        acl = xfs_get_acl(inode, ACL_TYPE_ACCESS);
+        if (IS_ERR(acl) || !acl)
+                return PTR_ERR(acl);
+        clone = posix_acl_clone(acl, GFP_KERNEL);
+        posix_acl_release(acl);
+        if (!clone)
+                return -ENOMEM;
+        error = posix_acl_chmod_masq(clone, inode->i_mode);
+        if (!error)
+                error = xfs_set_acl(inode, ACL_TYPE_ACCESS, clone);
+        posix_acl_release(clone);
+        return error;
+}
+void
+xfs_inode_init_acls(struct xfs_inode *ip)
+{
+        /*
+         * No need for locking, inode is not live yet.
+         */
+        ip->i_acl = XFS_ACL_NOT_CACHED;
+        ip->i_default_acl = XFS_ACL_NOT_CACHED;
+}
+void
+xfs_inode_clear_acls(struct xfs_inode *ip)
+{
+        /*
+         * No need for locking here, the inode is not live anymore
+         * and just about to be freed.
+         */
+        if (ip->i_acl != XFS_ACL_NOT_CACHED)
+                posix_acl_release(ip->i_acl);
+        if (ip->i_default_acl != XFS_ACL_NOT_CACHED)
+                posix_acl_release(ip->i_default_acl);
+}
+/*
+ * System xattr handlers.
+ *
+ * Currently Posix ACLs are the only system namespace extended attribute
+ * handlers supported by XFS, so we just implement the handlers here.
+ * If we ever support other system extended attributes this will need
+ * some refactoring.
+ */
+static int
+xfs_decode_acl(const char *name)
+{
+        if (strcmp(name, "posix_acl_access") == 0)
+                return ACL_TYPE_ACCESS;
+        else if (strcmp(name, "posix_acl_default") == 0)
+                return ACL_TYPE_DEFAULT;
+        return -EINVAL;
+}
+static int
+xfs_xattr_system_get(struct inode *inode, const char *name,
+                void *value, size_t size)
+{
+        struct posix_acl *acl;
+        int type, error;
+        type = xfs_decode_acl(name);
+        if (type < 0)
+                return type;
+        acl = xfs_get_acl(inode, type);
+        if (IS_ERR(acl))
+                return PTR_ERR(acl);
+        if (acl == NULL)
+                return -ENODATA;
+        error = posix_acl_to_xattr(acl, value, size);
+        posix_acl_release(acl);
+        return error;
+}
+static int
+xfs_xattr_system_set(struct inode *inode, const char *name,
+                const void *value, size_t size, int flags)
+{
+        struct posix_acl *acl = NULL;
+        int error = 0, type;
+        type = xfs_decode_acl(name);
+        if (type < 0)
+                return type;
+        if (flags & XATTR_CREATE)
+                return -EINVAL;
+        if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
+                return value ? -EACCES : 0;
+        if ((current_fsuid() != inode->i_uid) && !capable(CAP_FOWNER))
+                return -EPERM;
+        if (!value)
+                goto set_acl;
+        acl = posix_acl_from_xattr(value, size);
+        if (!acl) {
+                /*
+                 * acl_set_file(3) may request that we set default ACLs with
+                 * zero length -- defend (gracefully) against that here.
+                 */
+                goto out;
+        }
+        if (IS_ERR(acl)) {
+                error = PTR_ERR(acl);
+                goto out;
+        }
+        error = posix_acl_valid(acl);
+        if (error)
+                goto out_release;
+        error = -EINVAL;
+        if (acl->a_count > XFS_ACL_MAX_ENTRIES)
+                goto out_release;
+        if (type == ACL_TYPE_ACCESS) {
+                mode_t mode = inode->i_mode;
+                error = posix_acl_equiv_mode(acl, &mode);
+                if (error <= 0) {
+                        posix_acl_release(acl);
+                        acl = NULL;
+                        if (error < 0)
+                                return error;
+                }
+                error = xfs_set_mode(inode, mode);
+                if (error)
+                        goto out_release;
+        }
+ set_acl:
+        error = xfs_set_acl(inode, type, acl);
+ out_release:
+        posix_acl_release(acl);
+ out:
+        return error;
+}
+struct xattr_handler xfs_xattr_system_handler = {
+        .prefix = XATTR_SYSTEM_PREFIX,
+        .get    = xfs_xattr_system_get,
+        .set    = xfs_xattr_system_set,
+};
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 34eaab608e6e..5bb523d7f37e 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -41,7 +41,6 @@
 #include "xfs_itable.h"
 #include "xfs_error.h"
 #include "xfs_rw.h"
-#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_bmap.h"
 #include "xfs_buf_item.h"
@@ -899,7 +898,8 @@ xfs_ioctl_setattr(
        struct xfs_mount        *mp = ip->i_mount;
        struct xfs_trans        *tp;
        unsigned int            lock_flags = 0;
-        struct xfs_dquot        *udqp = NULL, *gdqp = NULL;
+        struct xfs_dquot        *udqp = NULL;
+        struct xfs_dquot        *gdqp = NULL;
        struct xfs_dquot        *olddquot = NULL;
        int                     code;
@@ -919,7 +919,7 @@ xfs_ioctl_setattr(
         * because the i_*dquot fields will get updated anyway.
         */
        if (XFS_IS_QUOTA_ON(mp) && (mask & FSX_PROJID)) {
-                code = XFS_QM_DQVOPALLOC(mp, ip, ip->i_d.di_uid,
+                code = xfs_qm_vop_dqalloc(ip, ip->i_d.di_uid,
                                         ip->i_d.di_gid, fa->fsx_projid,
                                         XFS_QMOPT_PQUOTA, &udqp, &gdqp);
                if (code)
@@ -954,10 +954,11 @@ xfs_ioctl_setattr(
         * Do a quota reservation only if projid is actually going to change.
         */
        if (mask & FSX_PROJID) {
-                if (XFS_IS_PQUOTA_ON(mp) &&
+                if (XFS_IS_QUOTA_RUNNING(mp) &&
+                    XFS_IS_PQUOTA_ON(mp) &&
                    ip->i_d.di_projid != fa->fsx_projid) {
                        ASSERT(tp);
-                        code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp,
+                        code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
                                                capable(CAP_FOWNER) ?
                                                XFS_QMOPT_FORCE_RES : 0);
                        if (code)       /* out of quota */
@@ -1059,8 +1060,8 @@ xfs_ioctl_setattr(
                 * in the transaction.
                 */
                if (ip->i_d.di_projid != fa->fsx_projid) {
-                        if (XFS_IS_PQUOTA_ON(mp)) {
+                        if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) {
-                                olddquot = XFS_QM_DQVOPCHOWN(mp, tp, ip,
+                                olddquot = xfs_qm_vop_chown(tp, ip,
                                                        &ip->i_gdquot, gdqp);
                        }
                        ip->i_d.di_projid = fa->fsx_projid;
@@ -1106,9 +1107,9 @@ xfs_ioctl_setattr(
        /*
         * Release any dquot(s) the inode had kept before chown.
         */
-        XFS_QM_DQRELE(mp, olddquot);
+        xfs_qm_dqrele(olddquot);
-        XFS_QM_DQRELE(mp, udqp);
+        xfs_qm_dqrele(udqp);
-        XFS_QM_DQRELE(mp, gdqp);
+        xfs_qm_dqrele(gdqp);
        if (code)
                return code;
@@ -1122,8 +1123,8 @@ xfs_ioctl_setattr(
        return 0;
 error_return:
-        XFS_QM_DQRELE(mp, udqp);
+        xfs_qm_dqrele(udqp);
-        XFS_QM_DQRELE(mp, gdqp);
+        xfs_qm_dqrele(gdqp);
        xfs_trans_cancel(tp, 0);
        if (lock_flags)
                xfs_iunlock(ip, lock_flags);
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 6075382336d7..58973bb46038 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -17,6 +17,7 @@
 */
 #include "xfs.h"
 #include "xfs_fs.h"
+#include "xfs_acl.h"
 #include "xfs_bit.h"
 #include "xfs_log.h"
 #include "xfs_inum.h"
@@ -51,6 +52,7 @@
 #include <linux/capability.h>
 #include <linux/xattr.h>
 #include <linux/namei.h>
+#include <linux/posix_acl.h>
 #include <linux/security.h>
 #include <linux/falloc.h>
 #include <linux/fiemap.h>
@@ -202,9 +204,8 @@ xfs_vn_mknod(
 {
        struct inode    *inode;
        struct xfs_inode *ip = NULL;
-        xfs_acl_t       *default_acl = NULL;
+        struct posix_acl *default_acl = NULL;
        struct xfs_name name;
-        int (*test_default_acl)(struct inode *) = _ACL_DEFAULT_EXISTS;
        int             error;
        /*
@@ -219,18 +220,14 @@ xfs_vn_mknod(
                rdev = 0;
        }
-        if (test_default_acl && test_default_acl(dir)) {
+        if (IS_POSIXACL(dir)) {
-                if (!_ACL_ALLOC(default_acl)) {
+                default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT);
-                        return -ENOMEM;
+                if (IS_ERR(default_acl))
-                }
+                        return -PTR_ERR(default_acl);
-                if (!_ACL_GET_DEFAULT(dir, default_acl)) {
-                        _ACL_FREE(default_acl);
-                        default_acl = NULL;
-                }
-        }
-        if (IS_POSIXACL(dir) && !default_acl)
+                if (!default_acl)
-                mode &= ~current_umask();
+                        mode &= ~current_umask();
+        }
        xfs_dentry_to_name(&name, dentry);
        error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip, NULL);
@@ -244,10 +241,10 @@ xfs_vn_mknod(
                goto out_cleanup_inode;
        if (default_acl) {
-                error = _ACL_INHERIT(inode, mode, default_acl);
+                error = -xfs_inherit_acl(inode, default_acl);
                if (unlikely(error))
                        goto out_cleanup_inode;
-                _ACL_FREE(default_acl);
+                posix_acl_release(default_acl);
        }
@@ -257,8 +254,7 @@ xfs_vn_mknod(
 out_cleanup_inode:
        xfs_cleanup_inode(dir, inode, dentry);
 out_free_acl:
-        if (default_acl)
+        posix_acl_release(default_acl);
-                _ACL_FREE(default_acl);
        return -error;
 }
@@ -488,26 +484,6 @@ xfs_vn_put_link(
                kfree(s);
 }
-#ifdef CONFIG_XFS_POSIX_ACL
-STATIC int
-xfs_check_acl(
-        struct inode            *inode,
-        int                     mask)
-{
-        struct xfs_inode        *ip = XFS_I(inode);
-        int                     error;
-        xfs_itrace_entry(ip);
-        if (XFS_IFORK_Q(ip)) {
-                error = xfs_acl_iaccess(ip, mask, NULL);
-                if (error != -1)
-                        return -error;
-        }
-        return -EAGAIN;
-}
 STATIC int
 xfs_vn_permission(
        struct inode            *inode,
@@ -515,9 +491,6 @@ xfs_vn_permission(
 {
        return generic_permission(inode, mask, xfs_check_acl);
 }
-#else
-#define xfs_vn_permission NULL
-#endif
 STATIC int
 xfs_vn_getattr(
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 9142192ccbe6..7078974a6eee 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -42,7 +42,6 @@
 #include "xfs_error.h"
 #include "xfs_itable.h"
 #include "xfs_rw.h"
-#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_inode_item.h"
 #include "xfs_buf_item.h"
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c
index 94d9a633d3d9..cb6e2cca214f 100644
--- a/fs/xfs/linux-2.6/xfs_quotaops.c
+++ b/fs/xfs/linux-2.6/xfs_quotaops.c
@@ -50,9 +50,11 @@ xfs_fs_quota_sync(
 {
        struct xfs_mount        *mp = XFS_M(sb);
+        if (sb->s_flags & MS_RDONLY)
+                return -EROFS;
        if (!XFS_IS_QUOTA_RUNNING(mp))
                return -ENOSYS;
-        return -xfs_sync_inodes(mp, SYNC_DELWRI);
+        return -xfs_sync_data(mp, 0);
 }
 STATIC int
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 08d6bd9a3947..2e09efbca8db 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -43,7 +43,6 @@
 #include "xfs_itable.h"
 #include "xfs_fsops.h"
 #include "xfs_rw.h"
-#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_utils.h"
@@ -405,6 +404,14 @@ xfs_parseargs(
                return EINVAL;
        }
+#ifndef CONFIG_XFS_QUOTA
+        if (XFS_IS_QUOTA_RUNNING(mp)) {
+                cmn_err(CE_WARN,
+                        "XFS: quota support not available in this kernel.");
+                return EINVAL;
+        }
+#endif
        if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
            (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) {
                cmn_err(CE_WARN,
@@ -1063,7 +1070,18 @@ xfs_fs_put_super(
        int                     unmount_event_flags = 0;
        xfs_syncd_stop(mp);
-        xfs_sync_inodes(mp, SYNC_ATTR|SYNC_DELWRI);
+        if (!(sb->s_flags & MS_RDONLY)) {
+                /*
+                 * XXX(hch): this should be SYNC_WAIT.
+                 *
+                 * Or more likely not needed at all because the VFS is already
+                 * calling ->sync_fs after shutting down all filestem
+                 * operations and just before calling ->put_super.
+                 */
+                xfs_sync_data(mp, 0);
+                xfs_sync_attr(mp, 0);
+        }
 #ifdef HAVE_DMAPI
        if (mp->m_flags & XFS_MOUNT_DMAPI) {
@@ -1098,7 +1116,6 @@ xfs_fs_put_super(
        xfs_freesb(mp);
        xfs_icsb_destroy_counters(mp);
        xfs_close_devices(mp);
-        xfs_qmops_put(mp);
        xfs_dmops_put(mp);
        xfs_free_fsname(mp);
        kfree(mp);
@@ -1158,6 +1175,7 @@ xfs_fs_statfs(
 {
        struct xfs_mount        *mp = XFS_M(dentry->d_sb);
        xfs_sb_t                *sbp = &mp->m_sb;
+        struct xfs_inode        *ip = XFS_I(dentry->d_inode);
        __uint64_t              fakeinos, id;
        xfs_extlen_t            lsize;
@@ -1186,7 +1204,10 @@ xfs_fs_statfs(
        statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
        spin_unlock(&mp->m_sb_lock);
-        XFS_QM_DQSTATVFS(XFS_I(dentry->d_inode), statp);
+        if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) ||
+            ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) ==
+                              (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))
+                xfs_qm_statvfs(ip, statp);
        return 0;
 }
@@ -1394,16 +1415,13 @@ xfs_fs_fill_super(
        error = xfs_dmops_get(mp);
        if (error)
                goto out_free_fsname;
-        error = xfs_qmops_get(mp);
-        if (error)
-                goto out_put_dmops;
        if (silent)
                flags |= XFS_MFSI_QUIET;
        error = xfs_open_devices(mp);
        if (error)
-                goto out_put_qmops;
+                goto out_put_dmops;
        if (xfs_icsb_init_counters(mp))
                mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
@@ -1471,8 +1489,6 @@ xfs_fs_fill_super(
 out_destroy_counters:
        xfs_icsb_destroy_counters(mp);
        xfs_close_devices(mp);
- out_put_qmops:
-        xfs_qmops_put(mp);
 out_put_dmops:
        xfs_dmops_put(mp);
 out_free_fsname:
@@ -1706,18 +1722,8 @@ xfs_init_zones(void)
        if (!xfs_ili_zone)
                goto out_destroy_inode_zone;
-#ifdef CONFIG_XFS_POSIX_ACL
-        xfs_acl_zone = kmem_zone_init(sizeof(xfs_acl_t), "xfs_acl");
-        if (!xfs_acl_zone)
-                goto out_destroy_ili_zone;
-#endif
        return 0;
-#ifdef CONFIG_XFS_POSIX_ACL
- out_destroy_ili_zone:
-#endif
-        kmem_zone_destroy(xfs_ili_zone);
 out_destroy_inode_zone:
        kmem_zone_destroy(xfs_inode_zone);
 out_destroy_efi_zone:
@@ -1751,9 +1757,6 @@ xfs_init_zones(void)
 STATIC void
 xfs_destroy_zones(void)
 {
-#ifdef CONFIG_XFS_POSIX_ACL
-        kmem_zone_destroy(xfs_acl_zone);
-#endif
        kmem_zone_destroy(xfs_ili_zone);
        kmem_zone_destroy(xfs_inode_zone);
        kmem_zone_destroy(xfs_efi_zone);
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index f7ba76633c29..b619d6b8ca43 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -43,166 +43,267 @@
 #include "xfs_buf_item.h"
 #include "xfs_inode_item.h"
 #include "xfs_rw.h"
+#include "xfs_quota.h"
 #include <linux/kthread.h>
 #include <linux/freezer.h>
-/*
- * Sync all the inodes in the given AG according to the
- * direction given by the flags.
- */
-STATIC int
-xfs_sync_inodes_ag(
-        xfs_mount_t     *mp,
-        int             ag,
-        int             flags)
-{
-        xfs_perag_t     *pag = &mp->m_perag[ag];
-        int             nr_found;
-        uint32_t        first_index = 0;
-        int             error = 0;
-        int             last_error = 0;
-        do {
+STATIC xfs_inode_t *
-                struct inode    *inode;
+xfs_inode_ag_lookup(
-                xfs_inode_t     *ip = NULL;
+        struct xfs_mount        *mp,
-                int             lock_flags = XFS_ILOCK_SHARED;
+        struct xfs_perag        *pag,
+        uint32_t                *first_index,
+        int                     tag)
+{
+        int                     nr_found;
+        struct xfs_inode        *ip;
-                /*
+        /*
-                 * use a gang lookup to find the next inode in the tree
+         * use a gang lookup to find the next inode in the tree
-                 * as the tree is sparse and a gang lookup walks to find
+         * as the tree is sparse and a gang lookup walks to find
-                 * the number of objects requested.
+         * the number of objects requested.
-                 */
+         */
-                read_lock(&pag->pag_ici_lock);
+        read_lock(&pag->pag_ici_lock);
+        if (tag == XFS_ICI_NO_TAG) {
                nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
-                                (void**)&ip, first_index, 1);
+                                (void **)&ip, *first_index, 1);
+        } else {
+                nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
+                                (void **)&ip, *first_index, 1, tag);
+        }
+        if (!nr_found)
+                goto unlock;
-                if (!nr_found) {
+        /*
-                        read_unlock(&pag->pag_ici_lock);
+         * Update the index for the next lookup. Catch overflows
-                        break;
+         * into the next AG range which can occur if we have inodes
-                }
+         * in the last block of the AG and we are currently
+         * pointing to the last inode.
+         */
+        *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+        if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
+                goto unlock;
-                /*
+        return ip;
-                 * Update the index for the next lookup. Catch overflows
-                 * into the next AG range which can occur if we have inodes
-                 * in the last block of the AG and we are currently
-                 * pointing to the last inode.
-                 */
-                first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
-                if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
-                        read_unlock(&pag->pag_ici_lock);
-                        break;
-                }
-                /* nothing to sync during shutdown */
+unlock:
-                if (XFS_FORCED_SHUTDOWN(mp)) {
+        read_unlock(&pag->pag_ici_lock);
-                        read_unlock(&pag->pag_ici_lock);
+        return NULL;
-                        return 0;
+}
-                }
-                /*
+STATIC int
-                 * If we can't get a reference on the inode, it must be
+xfs_inode_ag_walk(
-                 * in reclaim. Leave it for the reclaim code to flush.
+        struct xfs_mount        *mp,
-                 */
+        xfs_agnumber_t          ag,
-                inode = VFS_I(ip);
+        int                     (*execute)(struct xfs_inode *ip,
-                if (!igrab(inode)) {
+                                           struct xfs_perag *pag, int flags),
-                        read_unlock(&pag->pag_ici_lock);
+        int                     flags,
-                        continue;
+        int                     tag)
-                }
+{
-                read_unlock(&pag->pag_ici_lock);
+        struct xfs_perag        *pag = &mp->m_perag[ag];
+        uint32_t                first_index;
+        int                     last_error = 0;
+        int                     skipped;
-                /* avoid new or bad inodes */
+restart:
-                if (is_bad_inode(inode) ||
+        skipped = 0;
-                    xfs_iflags_test(ip, XFS_INEW)) {
+        first_index = 0;
-                        IRELE(ip);
+        do {
-                        continue;
+                int             error = 0;
-                }
+                xfs_inode_t     *ip;
-                /*
+                ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag);
-                 * If we have to flush data or wait for I/O completion
+                if (!ip)
-                 * we need to hold the iolock.
+                        break;
-                 */
-                if (flags & SYNC_DELWRI) {
-                        if (VN_DIRTY(inode)) {
-                                if (flags & SYNC_TRYLOCK) {
-                                        if (xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
-                                                lock_flags |= XFS_IOLOCK_SHARED;
-                                } else {
-                                        xfs_ilock(ip, XFS_IOLOCK_SHARED);
-                                        lock_flags |= XFS_IOLOCK_SHARED;
-                                }
-                                if (lock_flags & XFS_IOLOCK_SHARED) {
-                                        error = xfs_flush_pages(ip, 0, -1,
-                                                        (flags & SYNC_WAIT) ? 0
-                                                                : XFS_B_ASYNC,
-                                                        FI_NONE);
-                                }
-                        }
-                        if (VN_CACHED(inode) && (flags & SYNC_IOWAIT))
-                                xfs_ioend_wait(ip);
-                }
-                xfs_ilock(ip, XFS_ILOCK_SHARED);
-                if ((flags & SYNC_ATTR) && !xfs_inode_clean(ip)) {
-                        if (flags & SYNC_WAIT) {
-                                xfs_iflock(ip);
-                                if (!xfs_inode_clean(ip))
-                                        error = xfs_iflush(ip, XFS_IFLUSH_SYNC);
-                                else
-                                        xfs_ifunlock(ip);
-                        } else if (xfs_iflock_nowait(ip)) {
-                                if (!xfs_inode_clean(ip))
-                                        error = xfs_iflush(ip, XFS_IFLUSH_DELWRI);
-                                else
-                                        xfs_ifunlock(ip);
-                        }
-                }
-                xfs_iput(ip, lock_flags);
+                error = execute(ip, pag, flags);
+                if (error == EAGAIN) {
+                        skipped++;
+                        continue;
+                }
                if (error)
                        last_error = error;
                /*
                 * bail out if the filesystem is corrupted.
                 */
                if (error == EFSCORRUPTED)
-                        return XFS_ERROR(error);
+                        break;
-        } while (nr_found);
+        } while (1);
+        if (skipped) {
+                delay(1);
+                goto restart;
+        }
+        xfs_put_perag(mp, pag);
        return last_error;
 }
 int
-xfs_sync_inodes(
+xfs_inode_ag_iterator(
-        xfs_mount_t     *mp,
+        struct xfs_mount        *mp,
-        int             flags)
+        int                     (*execute)(struct xfs_inode *ip,
+                                           struct xfs_perag *pag, int flags),
+        int                     flags,
+        int                     tag)
 {
-        int             error;
+        int                     error = 0;
-        int             last_error;
+        int                     last_error = 0;
-        int             i;
+        xfs_agnumber_t          ag;
-        int             lflags = XFS_LOG_FORCE;
-        if (mp->m_flags & XFS_MOUNT_RDONLY)
+        for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
-                return 0;
+                if (!mp->m_perag[ag].pag_ici_init)
-        error = 0;
+                        continue;
-        last_error = 0;
+                error = xfs_inode_ag_walk(mp, ag, execute, flags, tag);
+                if (error) {
+                        last_error = error;
+                        if (error == EFSCORRUPTED)
+                                break;
+                }
+        }
+        return XFS_ERROR(last_error);
+}
+/* must be called with pag_ici_lock held and releases it */
+int
+xfs_sync_inode_valid(
+        struct xfs_inode        *ip,
+        struct xfs_perag        *pag)
+{
+        struct inode            *inode = VFS_I(ip);
+        /* nothing to sync during shutdown */
+        if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+                read_unlock(&pag->pag_ici_lock);
+                return EFSCORRUPTED;
+        }
+        /*
+         * If we can't get a reference on the inode, it must be in reclaim.
+         * Leave it for the reclaim code to flush. Also avoid inodes that
+         * haven't been fully initialised.
+         */
+        if (!igrab(inode)) {
+                read_unlock(&pag->pag_ici_lock);
+                return ENOENT;
+        }
+        read_unlock(&pag->pag_ici_lock);
+        if (is_bad_inode(inode) || xfs_iflags_test(ip, XFS_INEW)) {
+                IRELE(ip);
+                return ENOENT;
+        }
+        return 0;
+}
+STATIC int
+xfs_sync_inode_data(
+        struct xfs_inode        *ip,
+        struct xfs_perag        *pag,
+        int                     flags)
+{
+        struct inode            *inode = VFS_I(ip);
+        struct address_space *mapping = inode->i_mapping;
+        int                     error = 0;
+        error = xfs_sync_inode_valid(ip, pag);
+        if (error)
+                return error;
+        if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
+                goto out_wait;
+        if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) {
+                if (flags & SYNC_TRYLOCK)
+                        goto out_wait;
+                xfs_ilock(ip, XFS_IOLOCK_SHARED);
+        }
+        error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ?
+                                0 : XFS_B_ASYNC, FI_NONE);
+        xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+ out_wait:
        if (flags & SYNC_WAIT)
-                lflags |= XFS_LOG_SYNC;
+                xfs_ioend_wait(ip);
+        IRELE(ip);
+        return error;
+}
-        for (i = 0; i < mp->m_sb.sb_agcount; i++) {
+STATIC int
-                if (!mp->m_perag[i].pag_ici_init)
+xfs_sync_inode_attr(
-                        continue;
+        struct xfs_inode        *ip,
-                error = xfs_sync_inodes_ag(mp, i, flags);
+        struct xfs_perag        *pag,
-                if (error)
+        int                     flags)
-                        last_error = error;
+{
-                if (error == EFSCORRUPTED)
+        int                     error = 0;
-                        break;
+        error = xfs_sync_inode_valid(ip, pag);
+        if (error)
+                return error;
+        xfs_ilock(ip, XFS_ILOCK_SHARED);
+        if (xfs_inode_clean(ip))
+                goto out_unlock;
+        if (!xfs_iflock_nowait(ip)) {
+                if (!(flags & SYNC_WAIT))
+                        goto out_unlock;
+                xfs_iflock(ip);
        }
-        if (flags & SYNC_DELWRI)
-                xfs_log_force(mp, 0, lflags);
-        return XFS_ERROR(last_error);
+        if (xfs_inode_clean(ip)) {
+                xfs_ifunlock(ip);
+                goto out_unlock;
+        }
+        error = xfs_iflush(ip, (flags & SYNC_WAIT) ?
+                           XFS_IFLUSH_SYNC : XFS_IFLUSH_DELWRI);
+ out_unlock:
+        xfs_iunlock(ip, XFS_ILOCK_SHARED);
+        IRELE(ip);
+        return error;
+}
+/*
+ * Write out pagecache data for the whole filesystem.
+ */
+int
+xfs_sync_data(
+        struct xfs_mount        *mp,
+        int                     flags)
+{
+        int                     error;
+        ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
+        error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
+                                      XFS_ICI_NO_TAG);
+        if (error)
+                return XFS_ERROR(error);
+        xfs_log_force(mp, 0,
+                      (flags & SYNC_WAIT) ?
+                       XFS_LOG_FORCE | XFS_LOG_SYNC :
+                       XFS_LOG_FORCE);
+        return 0;
+}
+/*
+ * Write out inode metadata (attributes) for the whole filesystem.
+ */
+int
+xfs_sync_attr(
+        struct xfs_mount        *mp,
+        int                     flags)
+{
+        ASSERT((flags & ~SYNC_WAIT) == 0);
+        return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,
+                                     XFS_ICI_NO_TAG);
 }
 STATIC int
@@ -252,7 +353,7 @@ xfs_sync_fsdata(
         * If this is xfssyncd() then only sync the superblock if we can
         * lock it without sleeping and it is not pinned.
         */
-        if (flags & SYNC_BDFLUSH) {
+        if (flags & SYNC_TRYLOCK) {
                ASSERT(!(flags & SYNC_WAIT));
                bp = xfs_getsb(mp, XFS_BUF_TRYLOCK);
@@ -316,13 +417,13 @@ xfs_quiesce_data(
        int error;
        /* push non-blocking */
-        xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_BDFLUSH);
+        xfs_sync_data(mp, 0);
-        XFS_QM_DQSYNC(mp, SYNC_BDFLUSH);
+        xfs_qm_sync(mp, SYNC_TRYLOCK);
        xfs_filestream_flush(mp);
        /* push and block */
-        xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_WAIT|SYNC_IOWAIT);
+        xfs_sync_data(mp, SYNC_WAIT);
-        XFS_QM_DQSYNC(mp, SYNC_WAIT);
+        xfs_qm_sync(mp, SYNC_WAIT);
        /* write superblock and hoover up shutdown errors */
        error = xfs_sync_fsdata(mp, 0);
@@ -341,7 +442,7 @@ xfs_quiesce_fs(
        int     count = 0, pincount;
        xfs_flush_buftarg(mp->m_ddev_targp, 0);
-        xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
+        xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
        /*
         * This loop must run at least twice.  The first instance of the loop
@@ -350,7 +451,7 @@ xfs_quiesce_fs(
         * logged before we can write the unmount record.
         */
        do {
-                xfs_sync_inodes(mp, SYNC_ATTR|SYNC_WAIT);
+                xfs_sync_attr(mp, SYNC_WAIT);
                pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
                if (!pincount) {
                        delay(50);
@@ -433,8 +534,8 @@ xfs_flush_inodes_work(
        void            *arg)
 {
        struct inode    *inode = arg;
-        xfs_sync_inodes(mp, SYNC_DELWRI | SYNC_TRYLOCK);
+        xfs_sync_data(mp, SYNC_TRYLOCK);
-        xfs_sync_inodes(mp, SYNC_DELWRI | SYNC_TRYLOCK | SYNC_IOWAIT);
+        xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
        iput(inode);
 }
@@ -465,10 +566,10 @@ xfs_sync_worker(
        if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
                xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
-                xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
+                xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
                /* dgc: errors ignored here */
-                error = XFS_QM_DQSYNC(mp, SYNC_BDFLUSH);
+                error = xfs_qm_sync(mp, SYNC_TRYLOCK);
-                error = xfs_sync_fsdata(mp, SYNC_BDFLUSH);
+                error = xfs_sync_fsdata(mp, SYNC_TRYLOCK);
                if (xfs_log_need_covered(mp))
                        error = xfs_commit_dummy_trans(mp, XFS_LOG_FORCE);
        }
@@ -569,7 +670,7 @@ xfs_reclaim_inode(
                        xfs_ifunlock(ip);
                        xfs_iunlock(ip, XFS_ILOCK_EXCL);
                }
-                return 1;
+                return -EAGAIN;
        }
        __xfs_iflags_set(ip, XFS_IRECLAIM);
        spin_unlock(&ip->i_flags_lock);
@@ -654,101 +755,27 @@ xfs_inode_clear_reclaim_tag(
        xfs_put_perag(mp, pag);
 }
+STATIC int
-STATIC void
+xfs_reclaim_inode_now(
-xfs_reclaim_inodes_ag(
+        struct xfs_inode        *ip,
-        xfs_mount_t     *mp,
+        struct xfs_perag        *pag,
-        int             ag,
+        int                     flags)
-        int             noblock,
-        int             mode)
 {
-        xfs_inode_t     *ip = NULL;
+        /* ignore if already under reclaim */
-        xfs_perag_t     *pag = &mp->m_perag[ag];
+        if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
-        int             nr_found;
-        uint32_t        first_index;
-        int             skipped;
-restart:
-        first_index = 0;
-        skipped = 0;
-        do {
-                /*
-                 * use a gang lookup to find the next inode in the tree
-                 * as the tree is sparse and a gang lookup walks to find
-                 * the number of objects requested.
-                 */
-                read_lock(&pag->pag_ici_lock);
-                nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
-                                        (void**)&ip, first_index, 1,
-                                        XFS_ICI_RECLAIM_TAG);
-                if (!nr_found) {
-                        read_unlock(&pag->pag_ici_lock);
-                        break;
-                }
-                /*
-                 * Update the index for the next lookup. Catch overflows
-                 * into the next AG range which can occur if we have inodes
-                 * in the last block of the AG and we are currently
-                 * pointing to the last inode.
-                 */
-                first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
-                if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
-                        read_unlock(&pag->pag_ici_lock);
-                        break;
-                }
-                /* ignore if already under reclaim */
-                if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
-                        read_unlock(&pag->pag_ici_lock);
-                        continue;
-                }
-                if (noblock) {
-                        if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
-                                read_unlock(&pag->pag_ici_lock);
-                                continue;
-                        }
-                        if (xfs_ipincount(ip) ||
-                            !xfs_iflock_nowait(ip)) {
-                                xfs_iunlock(ip, XFS_ILOCK_EXCL);
-                                read_unlock(&pag->pag_ici_lock);
-                                continue;
-                        }
-                }
                read_unlock(&pag->pag_ici_lock);
+                return 0;
-                /*
-                 * hmmm - this is an inode already in reclaim. Do
-                 * we even bother catching it here?
-                 */
-                if (xfs_reclaim_inode(ip, noblock, mode))
-                        skipped++;
-        } while (nr_found);
-        if (skipped) {
-                delay(1);
-                goto restart;
        }
-        return;
+        read_unlock(&pag->pag_ici_lock);
+        return xfs_reclaim_inode(ip, 0, flags);
 }
 int
 xfs_reclaim_inodes(
        xfs_mount_t     *mp,
-        int              noblock,
        int             mode)
 {
-        int             i;
+        return xfs_inode_ag_iterator(mp, xfs_reclaim_inode_now, mode,
+                                        XFS_ICI_RECLAIM_TAG);
-        for (i = 0; i < mp->m_sb.sb_agcount; i++) {
-                if (!mp->m_perag[i].pag_ici_init)
-                        continue;
-                xfs_reclaim_inodes_ag(mp, i, noblock, mode);
-        }
-        return 0;
 }
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index 308d5bf6dfbd..2a10301c99c7 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -29,17 +29,14 @@ typedef struct xfs_sync_work {
        struct completion       *w_completion;
 } xfs_sync_work_t;
-#define SYNC_ATTR               0x0001  /* sync attributes */
+#define SYNC_WAIT               0x0001  /* wait for i/o to complete */
-#define SYNC_DELWRI             0x0002  /* look at delayed writes */
+#define SYNC_TRYLOCK            0x0002  /* only try to lock inodes */
-#define SYNC_WAIT               0x0004  /* wait for i/o to complete */
-#define SYNC_BDFLUSH            0x0008  /* BDFLUSH is calling -- don't block */
-#define SYNC_IOWAIT             0x0010  /* wait for all I/O to complete */
-#define SYNC_TRYLOCK            0x0020  /* only try to lock inodes */
 int xfs_syncd_init(struct xfs_mount *mp);
 void xfs_syncd_stop(struct xfs_mount *mp);
-int xfs_sync_inodes(struct xfs_mount *mp, int flags);
+int xfs_sync_attr(struct xfs_mount *mp, int flags);
+int xfs_sync_data(struct xfs_mount *mp, int flags);
 int xfs_sync_fsdata(struct xfs_mount *mp, int flags);
 int xfs_quiesce_data(struct xfs_mount *mp);
@@ -48,10 +45,16 @@ void xfs_quiesce_attr(struct xfs_mount *mp);
 void xfs_flush_inodes(struct xfs_inode *ip);
 int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode);
-int xfs_reclaim_inodes(struct xfs_mount *mp, int noblock, int mode);
+int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
 void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
 void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip);
 void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
                                struct xfs_inode *ip);
+int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag);
+int xfs_inode_ag_iterator(struct xfs_mount *mp,
+        int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
+        int flags, int tag);
 #endif
diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/linux-2.6/xfs_xattr.c
index 964621fde6ed..497c7fb75cc1 100644
--- a/fs/xfs/linux-2.6/xfs_xattr.c
+++ b/fs/xfs/linux-2.6/xfs_xattr.c
@@ -29,67 +29,6 @@
 #include <linux/xattr.h>
-/*
- * ACL handling.  Should eventually be moved into xfs_acl.c
- */
-static int
-xfs_decode_acl(const char *name)
-{
-        if (strcmp(name, "posix_acl_access") == 0)
-                return _ACL_TYPE_ACCESS;
-        else if (strcmp(name, "posix_acl_default") == 0)
-                return _ACL_TYPE_DEFAULT;
-        return -EINVAL;
-}
-/*
- * Get system extended attributes which at the moment only
- * includes Posix ACLs.
- */
-static int
-xfs_xattr_system_get(struct inode *inode, const char *name,
-                void *buffer, size_t size)
-{
-        int acl;
-        acl = xfs_decode_acl(name);
-        if (acl < 0)
-                return acl;
-        return xfs_acl_vget(inode, buffer, size, acl);
-}
-static int
-xfs_xattr_system_set(struct inode *inode, const char *name,
-                const void *value, size_t size, int flags)
-{
-        int acl;
-        acl = xfs_decode_acl(name);
-        if (acl < 0)
-                return acl;
-        if (flags & XATTR_CREATE)
-                return -EINVAL;
-        if (!value)
-                return xfs_acl_vremove(inode, acl);
-        return xfs_acl_vset(inode, (void *)value, size, acl);
-}
-static struct xattr_handler xfs_xattr_system_handler = {
-        .prefix = XATTR_SYSTEM_PREFIX,
-        .get    = xfs_xattr_system_get,
-        .set    = xfs_xattr_system_set,
-};
-/*
- * Real xattr handling.  The only difference between the namespaces is
- * a flag passed to the low-level attr code.
- */
 static int
 __xfs_xattr_get(struct inode *inode, const char *name,
                void *value, size_t size, int xflags)
@@ -199,7 +138,9 @@ struct xattr_handler *xfs_xattr_handlers[] = {
        &xfs_xattr_user_handler,
        &xfs_xattr_trusted_handler,
        &xfs_xattr_security_handler,
+#ifdef CONFIG_XFS_POSIX_ACL
        &xfs_xattr_system_handler,
+#endif
        NULL
 };
@@ -310,7 +251,7 @@ xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
        /*
         * Then add the two synthetic ACL attributes.
         */
-        if (xfs_acl_vhasacl_access(inode)) {
+        if (posix_acl_access_exists(inode)) {
                error = list_one_attr(POSIX_ACL_XATTR_ACCESS,
                                strlen(POSIX_ACL_XATTR_ACCESS) + 1,
                                data, size, &context.count);
@@ -318,7 +259,7 @@ xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
                        return error;
        }
-        if (xfs_acl_vhasacl_default(inode)) {
+        if (posix_acl_default_exists(inode)) {
                error = list_one_attr(POSIX_ACL_XATTR_DEFAULT,
                                strlen(POSIX_ACL_XATTR_DEFAULT) + 1,
                                data, size, &context.count);
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index e4babcc63423..2f3f2229eaaf 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -42,7 +42,6 @@
 #include "xfs_error.h"
 #include "xfs_itable.h"
 #include "xfs_rw.h"
-#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_trans_space.h"
@@ -1194,7 +1193,9 @@ void
 xfs_qm_dqrele(
        xfs_dquot_t     *dqp)
 {
-        ASSERT(dqp);
+        if (!dqp)
+                return;
        xfs_dqtrace_entry(dqp, "DQRELE");
        xfs_dqlock(dqp);
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
index de0f402ddb4c..6533ead9b889 100644
--- a/fs/xfs/quota/xfs_dquot.h
+++ b/fs/xfs/quota/xfs_dquot.h
@@ -181,7 +181,6 @@ extern void		xfs_qm_adjust_dqlimits(xfs_mount_t *,
 extern int              xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *,
                                        xfs_dqid_t, uint, uint, xfs_dquot_t **);
 extern void             xfs_qm_dqput(xfs_dquot_t *);
-extern void             xfs_qm_dqrele(xfs_dquot_t *);
 extern void             xfs_dqlock(xfs_dquot_t *);
 extern void             xfs_dqlock2(xfs_dquot_t *, xfs_dquot_t *);
 extern void             xfs_dqunlock(xfs_dquot_t *);
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index 1728f6a7c4f5..d0d4a9a0bbd7 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -42,7 +42,6 @@
 #include "xfs_error.h"
 #include "xfs_itable.h"
 #include "xfs_rw.h"
-#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_trans_priv.h"
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 5b6695049e00..45b1bfef7388 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -42,7 +42,6 @@
 #include "xfs_error.h"
 #include "xfs_bmap.h"
 #include "xfs_rw.h"
-#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_trans_space.h"
@@ -287,11 +286,13 @@ xfs_qm_rele_quotafs_ref(
 * Just destroy the quotainfo structure.
 */
 void
-xfs_qm_unmount_quotadestroy(
+xfs_qm_unmount(
-        xfs_mount_t     *mp)
+        struct xfs_mount        *mp)
 {
-        if (mp->m_quotainfo)
+        if (mp->m_quotainfo) {
+                xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING);
                xfs_qm_destroy_quotainfo(mp);
+        }
 }
@@ -385,8 +386,13 @@ xfs_qm_mount_quotas(
        if (error) {
                xfs_fs_cmn_err(CE_WARN, mp,
                        "Failed to initialize disk quotas.");
+                return;
        }
-        return;
+#ifdef QUOTADEBUG
+        if (XFS_IS_QUOTA_ON(mp))
+                xfs_qm_internalqcheck(mp);
+#endif
 }
 /*
@@ -774,12 +780,11 @@ xfs_qm_dqattach_grouphint(
 * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON
 * into account.
 * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed.
- * If XFS_QMOPT_ILOCKED, then inode sent is already locked EXCL.
 * Inode may get unlocked and relocked in here, and the caller must deal with
 * the consequences.
 */
 int
-xfs_qm_dqattach(
+xfs_qm_dqattach_locked(
        xfs_inode_t     *ip,
        uint            flags)
 {
@@ -787,17 +792,14 @@ xfs_qm_dqattach(
        uint            nquotas = 0;
        int             error = 0;
-        if ((! XFS_IS_QUOTA_ON(mp)) ||
+        if (!XFS_IS_QUOTA_RUNNING(mp) ||
-            (! XFS_NOT_DQATTACHED(mp, ip)) ||
+            !XFS_IS_QUOTA_ON(mp) ||
-            (ip->i_ino == mp->m_sb.sb_uquotino) ||
+            !XFS_NOT_DQATTACHED(mp, ip) ||
-            (ip->i_ino == mp->m_sb.sb_gquotino))
+            ip->i_ino == mp->m_sb.sb_uquotino ||
+            ip->i_ino == mp->m_sb.sb_gquotino)
                return 0;
-        ASSERT((flags & XFS_QMOPT_ILOCKED) == 0 ||
+        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-               xfs_isilocked(ip, XFS_ILOCK_EXCL));
-        if (! (flags & XFS_QMOPT_ILOCKED))
-                xfs_ilock(ip, XFS_ILOCK_EXCL);
        if (XFS_IS_UQUOTA_ON(mp)) {
                error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER,
@@ -849,8 +851,7 @@ xfs_qm_dqattach(
                xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot);
        }
-      done:
+ done:
 #ifdef QUOTADEBUG
        if (! error) {
                if (XFS_IS_UQUOTA_ON(mp))
@@ -858,15 +859,22 @@ xfs_qm_dqattach(
                if (XFS_IS_OQUOTA_ON(mp))
                        ASSERT(ip->i_gdquot);
        }
+        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 #endif
+        return error;
+}
-        if (! (flags & XFS_QMOPT_ILOCKED))
+int
-                xfs_iunlock(ip, XFS_ILOCK_EXCL);
+xfs_qm_dqattach(
+        struct xfs_inode        *ip,
+        uint                    flags)
+{
+        int                     error;
+        xfs_ilock(ip, XFS_ILOCK_EXCL);
+        error = xfs_qm_dqattach_locked(ip, flags);
+        xfs_iunlock(ip, XFS_ILOCK_EXCL);
-#ifdef QUOTADEBUG
-        else
-                ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-#endif
        return error;
 }
@@ -896,11 +904,6 @@ xfs_qm_dqdetach(
        }
 }
-/*
- * This is called to sync quotas. We can be told to use non-blocking
- * semantics by either the SYNC_BDFLUSH flag or the absence of the
- * SYNC_WAIT flag.
- */
 int
 xfs_qm_sync(
        xfs_mount_t     *mp,
@@ -909,17 +912,13 @@ xfs_qm_sync(
        int             recl, restarts;
        xfs_dquot_t     *dqp;
        uint            flush_flags;
-        boolean_t       nowait;
        int             error;
-        if (! XFS_IS_QUOTA_ON(mp))
+        if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
                return 0;
+        flush_flags = (flags & SYNC_WAIT) ? XFS_QMOPT_SYNC : XFS_QMOPT_DELWRI;
        restarts = 0;
-        /*
-         * We won't block unless we are asked to.
-         */
-        nowait = (boolean_t)(flags & SYNC_BDFLUSH || (flags & SYNC_WAIT) == 0);
  again:
        xfs_qm_mplist_lock(mp);
@@ -939,18 +938,10 @@ xfs_qm_sync(
                 * don't 'seem' to be dirty. ie. don't acquire dqlock.
                 * This is very similar to what xfs_sync does with inodes.
                 */
-                if (flags & SYNC_BDFLUSH) {
+                if (flags & SYNC_TRYLOCK) {
-                        if (! XFS_DQ_IS_DIRTY(dqp))
+                        if (!XFS_DQ_IS_DIRTY(dqp))
                                continue;
-                }
+                        if (!xfs_qm_dqlock_nowait(dqp))
-                if (nowait) {
-                        /*
-                         * Try to acquire the dquot lock. We are NOT out of
-                         * lock order, but we just don't want to wait for this
-                         * lock, unless somebody wanted us to.
-                         */
-                        if (! xfs_qm_dqlock_nowait(dqp))
                                continue;
                } else {
                        xfs_dqlock(dqp);
@@ -967,7 +958,7 @@ xfs_qm_sync(
                /* XXX a sentinel would be better */
                recl = XFS_QI_MPLRECLAIMS(mp);
                if (!xfs_dqflock_nowait(dqp)) {
-                        if (nowait) {
+                        if (flags & SYNC_TRYLOCK) {
                                xfs_dqunlock(dqp);
                                continue;
                        }
@@ -985,7 +976,6 @@ xfs_qm_sync(
                 * Let go of the mplist lock. We don't want to hold it
                 * across a disk write
                 */
-                flush_flags = (nowait) ? XFS_QMOPT_DELWRI : XFS_QMOPT_SYNC;
                xfs_qm_mplist_unlock(mp);
                xfs_dqtrace_entry(dqp, "XQM_SYNC: DQFLUSH");
                error = xfs_qm_dqflush(dqp, flush_flags);
@@ -2319,20 +2309,20 @@ xfs_qm_write_sb_changes(
 */
 int
 xfs_qm_vop_dqalloc(
-        xfs_mount_t     *mp,
+        struct xfs_inode        *ip,
-        xfs_inode_t     *ip,
+        uid_t                   uid,
-        uid_t           uid,
+        gid_t                   gid,
-        gid_t           gid,
+        prid_t                  prid,
-        prid_t          prid,
+        uint                    flags,
-        uint            flags,
+        struct xfs_dquot        **O_udqpp,
-        xfs_dquot_t     **O_udqpp,
+        struct xfs_dquot        **O_gdqpp)
-        xfs_dquot_t     **O_gdqpp)
 {
-        int             error;
+        struct xfs_mount        *mp = ip->i_mount;
-        xfs_dquot_t     *uq, *gq;
+        struct xfs_dquot        *uq, *gq;
-        uint            lockflags;
+        int                     error;
+        uint                    lockflags;
-        if (!XFS_IS_QUOTA_ON(mp))
+        if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
                return 0;
        lockflags = XFS_ILOCK_EXCL;
@@ -2346,8 +2336,8 @@ xfs_qm_vop_dqalloc(
         * if necessary. The dquot(s) will not be locked.
         */
        if (XFS_NOT_DQATTACHED(mp, ip)) {
-                if ((error = xfs_qm_dqattach(ip, XFS_QMOPT_DQALLOC |
+                error = xfs_qm_dqattach_locked(ip, XFS_QMOPT_DQALLOC);
-                                            XFS_QMOPT_ILOCKED))) {
+                if (error) {
                        xfs_iunlock(ip, lockflags);
                        return error;
                }
@@ -2469,6 +2459,7 @@ xfs_qm_vop_chown(
        uint            bfield = XFS_IS_REALTIME_INODE(ip) ?
                                 XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
        ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
@@ -2508,13 +2499,13 @@ xfs_qm_vop_chown_reserve(
        xfs_dquot_t     *gdqp,
        uint            flags)
 {
-        int             error;
+        xfs_mount_t     *mp = ip->i_mount;
-        xfs_mount_t     *mp;
        uint            delblks, blkflags, prjflags = 0;
        xfs_dquot_t     *unresudq, *unresgdq, *delblksudq, *delblksgdq;
+        int             error;
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
-        mp = ip->i_mount;
        ASSERT(XFS_IS_QUOTA_RUNNING(mp));
        delblks = ip->i_delayed_blks;
@@ -2582,28 +2573,23 @@ xfs_qm_vop_chown_reserve(
 int
 xfs_qm_vop_rename_dqattach(
-        xfs_inode_t     **i_tab)
+        struct xfs_inode        **i_tab)
 {
-        xfs_inode_t     *ip;
+        struct xfs_mount        *mp = i_tab[0]->i_mount;
-        int             i;
+        int                     i;
-        int             error;
-        ip = i_tab[0];
+        if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
-        if (! XFS_IS_QUOTA_ON(ip->i_mount))
                return 0;
-        if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) {
+        for (i = 0; (i < 4 && i_tab[i]); i++) {
-                error = xfs_qm_dqattach(ip, 0);
+                struct xfs_inode        *ip = i_tab[i];
-                if (error)
+                int                     error;
-                        return error;
-        }
-        for (i = 1; (i < 4 && i_tab[i]); i++) {
                /*
                 * Watch out for duplicate entries in the table.
                 */
-                if ((ip = i_tab[i]) != i_tab[i-1]) {
+                if (i == 0 || ip != i_tab[i-1]) {
-                        if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) {
+                        if (XFS_NOT_DQATTACHED(mp, ip)) {
                                error = xfs_qm_dqattach(ip, 0);
                                if (error)
                                        return error;
@@ -2614,17 +2600,19 @@ xfs_qm_vop_rename_dqattach(
 }
 void
-xfs_qm_vop_dqattach_and_dqmod_newinode(
+xfs_qm_vop_create_dqattach(
-        xfs_trans_t     *tp,
+        struct xfs_trans        *tp,
-        xfs_inode_t     *ip,
+        struct xfs_inode        *ip,
-        xfs_dquot_t     *udqp,
+        struct xfs_dquot        *udqp,
-        xfs_dquot_t     *gdqp)
+        struct xfs_dquot        *gdqp)
 {
-        if (!XFS_IS_QUOTA_ON(tp->t_mountp))
+        struct xfs_mount        *mp = tp->t_mountp;
+        if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
                return;
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-        ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp));
+        ASSERT(XFS_IS_QUOTA_RUNNING(mp));
        if (udqp) {
                xfs_dqlock(udqp);
@@ -2632,7 +2620,7 @@ xfs_qm_vop_dqattach_and_dqmod_newinode(
                xfs_dqunlock(udqp);
                ASSERT(ip->i_udquot == NULL);
                ip->i_udquot = udqp;
-                ASSERT(XFS_IS_UQUOTA_ON(tp->t_mountp));
+                ASSERT(XFS_IS_UQUOTA_ON(mp));
                ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
                xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
        }
@@ -2642,8 +2630,8 @@ xfs_qm_vop_dqattach_and_dqmod_newinode(
                xfs_dqunlock(gdqp);
                ASSERT(ip->i_gdquot == NULL);
                ip->i_gdquot = gdqp;
-                ASSERT(XFS_IS_OQUOTA_ON(tp->t_mountp));
+                ASSERT(XFS_IS_OQUOTA_ON(mp));
-                ASSERT((XFS_IS_GQUOTA_ON(tp->t_mountp) ?
+                ASSERT((XFS_IS_GQUOTA_ON(mp) ?
                        ip->i_d.di_gid : ip->i_d.di_projid) ==
                                be32_to_cpu(gdqp->q_core.d_id));
                xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
index a371954cae1b..495564b8af38 100644
--- a/fs/xfs/quota/xfs_qm.h
+++ b/fs/xfs/quota/xfs_qm.h
@@ -127,8 +127,6 @@ typedef struct xfs_quotainfo {
 } xfs_quotainfo_t;
-extern xfs_dqtrxops_t   xfs_trans_dquot_ops;
 extern void     xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long);
 extern int      xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *,
                        xfs_dquot_t *, xfs_dquot_t *, long, long, uint);
@@ -159,17 +157,11 @@ typedef struct xfs_dquot_acct {
 #define XFS_QM_RTBWARNLIMIT     5
 extern void             xfs_qm_destroy_quotainfo(xfs_mount_t *);
-extern void             xfs_qm_mount_quotas(xfs_mount_t *);
 extern int              xfs_qm_quotacheck(xfs_mount_t *);
-extern void             xfs_qm_unmount_quotadestroy(xfs_mount_t *);
-extern void             xfs_qm_unmount_quotas(xfs_mount_t *);
 extern int              xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t);
-extern int              xfs_qm_sync(xfs_mount_t *, int);
 /* dquot stuff */
 extern boolean_t        xfs_qm_dqalloc_incore(xfs_dquot_t **);
-extern int              xfs_qm_dqattach(xfs_inode_t *, uint);
-extern void             xfs_qm_dqdetach(xfs_inode_t *);
 extern int              xfs_qm_dqpurge_all(xfs_mount_t *, uint);
 extern void             xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint);
@@ -183,19 +175,6 @@ extern int		xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *);
 extern int              xfs_qm_scall_quotaon(xfs_mount_t *, uint);
 extern int              xfs_qm_scall_quotaoff(xfs_mount_t *, uint);
-/* vop stuff */
-extern int              xfs_qm_vop_dqalloc(xfs_mount_t *, xfs_inode_t *,
-                                        uid_t, gid_t, prid_t, uint,
-                                        xfs_dquot_t **, xfs_dquot_t **);
-extern void             xfs_qm_vop_dqattach_and_dqmod_newinode(
-                                        xfs_trans_t *, xfs_inode_t *,
-                                        xfs_dquot_t *, xfs_dquot_t *);
-extern int              xfs_qm_vop_rename_dqattach(xfs_inode_t **);
-extern xfs_dquot_t *    xfs_qm_vop_chown(xfs_trans_t *, xfs_inode_t *,
-                                        xfs_dquot_t **, xfs_dquot_t *);
-extern int              xfs_qm_vop_chown_reserve(xfs_trans_t *, xfs_inode_t *,
-                                        xfs_dquot_t *, xfs_dquot_t *, uint);
 /* list stuff */
 extern void             xfs_qm_freelist_append(xfs_frlist_t *, xfs_dquot_t *);
 extern void             xfs_qm_freelist_unlink(xfs_dquot_t *);
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index 63037c689a4b..a5346630dfae 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -42,7 +42,6 @@
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
 #include "xfs_rw.h"
-#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_qm.h"
@@ -84,7 +83,7 @@ xfs_fill_statvfs_from_dquot(
 * return a statvfs of the project, not the entire filesystem.
 * This makes such trees appear as if they are filesystems in themselves.
 */
-STATIC void
+void
 xfs_qm_statvfs(
        xfs_inode_t             *ip,
        struct kstatfs          *statp)
@@ -92,20 +91,13 @@ xfs_qm_statvfs(
        xfs_mount_t             *mp = ip->i_mount;
        xfs_dquot_t             *dqp;
-        if (!(ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) ||
-            !((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) ==
-                              (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))
-                return;
        if (!xfs_qm_dqget(mp, NULL, ip->i_d.di_projid, XFS_DQ_PROJ, 0, &dqp)) {
-                xfs_disk_dquot_t        *dp = &dqp->q_core;
+                xfs_fill_statvfs_from_dquot(statp, &dqp->q_core);
-                xfs_fill_statvfs_from_dquot(statp, dp);
                xfs_qm_dqput(dqp);
        }
 }
-STATIC int
+int
 xfs_qm_newmount(
        xfs_mount_t     *mp,
        uint            *needquotamount,
@@ -114,9 +106,6 @@ xfs_qm_newmount(
        uint            quotaondisk;
        uint            uquotaondisk = 0, gquotaondisk = 0, pquotaondisk = 0;
-        *quotaflags = 0;
-        *needquotamount = B_FALSE;
        quotaondisk = xfs_sb_version_hasquota(&mp->m_sb) &&
                                (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT);
@@ -179,66 +168,6 @@ xfs_qm_newmount(
        return 0;
 }
-STATIC int
-xfs_qm_endmount(
-        xfs_mount_t     *mp,
-        uint            needquotamount,
-        uint            quotaflags)
-{
-        if (needquotamount) {
-                ASSERT(mp->m_qflags == 0);
-                mp->m_qflags = quotaflags;
-                xfs_qm_mount_quotas(mp);
-        }
-#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
-        if (! (XFS_IS_QUOTA_ON(mp)))
-                xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas not turned on");
-        else
-                xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas turned on");
-#endif
-#ifdef QUOTADEBUG
-        if (XFS_IS_QUOTA_ON(mp) && xfs_qm_internalqcheck(mp))
-                cmn_err(CE_WARN, "XFS: mount internalqcheck failed");
-#endif
-        return 0;
-}
-STATIC void
-xfs_qm_dqrele_null(
-        xfs_dquot_t     *dq)
-{
-        /*
-         * Called from XFS, where we always check first for a NULL dquot.
-         */
-        if (!dq)
-                return;
-        xfs_qm_dqrele(dq);
-}
-struct xfs_qmops xfs_qmcore_xfs = {
-        .xfs_qminit             = xfs_qm_newmount,
-        .xfs_qmdone             = xfs_qm_unmount_quotadestroy,
-        .xfs_qmmount            = xfs_qm_endmount,
-        .xfs_qmunmount          = xfs_qm_unmount_quotas,
-        .xfs_dqrele             = xfs_qm_dqrele_null,
-        .xfs_dqattach           = xfs_qm_dqattach,
-        .xfs_dqdetach           = xfs_qm_dqdetach,
-        .xfs_dqpurgeall         = xfs_qm_dqpurge_all,
-        .xfs_dqvopalloc         = xfs_qm_vop_dqalloc,
-        .xfs_dqvopcreate        = xfs_qm_vop_dqattach_and_dqmod_newinode,
-        .xfs_dqvoprename        = xfs_qm_vop_rename_dqattach,
-        .xfs_dqvopchown         = xfs_qm_vop_chown,
-        .xfs_dqvopchownresv     = xfs_qm_vop_chown_reserve,
-        .xfs_dqstatvfs          = xfs_qm_statvfs,
-        .xfs_dqsync             = xfs_qm_sync,
-        .xfs_dqtrxops           = &xfs_trans_dquot_ops,
-};
-EXPORT_SYMBOL(xfs_qmcore_xfs);
 void __init
 xfs_qm_init(void)
 {
diff --git a/fs/xfs/quota/xfs_qm_stats.c b/fs/xfs/quota/xfs_qm_stats.c
index 709f5f545cf5..21b08c0396a1 100644
--- a/fs/xfs/quota/xfs_qm_stats.c
+++ b/fs/xfs/quota/xfs_qm_stats.c
@@ -42,7 +42,6 @@
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
 #include "xfs_rw.h"
-#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_qm.h"
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index c7b66f6506ce..4e4276b956e8 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -45,7 +45,6 @@
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
 #include "xfs_rw.h"
-#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_utils.h"
@@ -847,105 +846,55 @@ xfs_qm_export_flags(
 }
-/*
+STATIC int
- * Release all the dquots on the inodes in an AG.
+xfs_dqrele_inode(
- */
+        struct xfs_inode        *ip,
-STATIC void
+        struct xfs_perag        *pag,
-xfs_qm_dqrele_inodes_ag(
+        int                     flags)
-        xfs_mount_t     *mp,
-        int             ag,
-        uint            flags)
 {
-        xfs_inode_t     *ip = NULL;
+        int                     error;
-        xfs_perag_t     *pag = &mp->m_perag[ag];
-        int             first_index = 0;
-        int             nr_found;
-        do {
-                /*
-                 * use a gang lookup to find the next inode in the tree
-                 * as the tree is sparse and a gang lookup walks to find
-                 * the number of objects requested.
-                 */
-                read_lock(&pag->pag_ici_lock);
-                nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
-                                (void**)&ip, first_index, 1);
-                if (!nr_found) {
-                        read_unlock(&pag->pag_ici_lock);
-                        break;
-                }
-                /*
-                 * Update the index for the next lookup. Catch overflows
-                 * into the next AG range which can occur if we have inodes
-                 * in the last block of the AG and we are currently
-                 * pointing to the last inode.
-                 */
-                first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
-                if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
-                        read_unlock(&pag->pag_ici_lock);
-                        break;
-                }
-                /* skip quota inodes */
-                if (ip == XFS_QI_UQIP(mp) || ip == XFS_QI_GQIP(mp)) {
-                        ASSERT(ip->i_udquot == NULL);
-                        ASSERT(ip->i_gdquot == NULL);
-                        read_unlock(&pag->pag_ici_lock);
-                        continue;
-                }
-                /*
+        /* skip quota inodes */
-                 * If we can't get a reference on the inode, it must be
+        if (ip == XFS_QI_UQIP(ip->i_mount) || ip == XFS_QI_GQIP(ip->i_mount)) {
-                 * in reclaim. Leave it for the reclaim code to flush.
+                ASSERT(ip->i_udquot == NULL);
-                 */
+                ASSERT(ip->i_gdquot == NULL);
-                if (!igrab(VFS_I(ip))) {
-                        read_unlock(&pag->pag_ici_lock);
-                        continue;
-                }
                read_unlock(&pag->pag_ici_lock);
+                return 0;
+        }
-                /* avoid new inodes though we shouldn't find any here */
+        error = xfs_sync_inode_valid(ip, pag);
-                if (xfs_iflags_test(ip, XFS_INEW)) {
+        if (error)
-                        IRELE(ip);
+                return error;
-                        continue;
-                }
-                xfs_ilock(ip, XFS_ILOCK_EXCL);
+        xfs_ilock(ip, XFS_ILOCK_EXCL);
-                if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) {
+        if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) {
-                        xfs_qm_dqrele(ip->i_udquot);
+                xfs_qm_dqrele(ip->i_udquot);
-                        ip->i_udquot = NULL;
+                ip->i_udquot = NULL;
-                }
+        }
-                if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) &&
+        if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && ip->i_gdquot) {
-                    ip->i_gdquot) {
+                xfs_qm_dqrele(ip->i_gdquot);
-                        xfs_qm_dqrele(ip->i_gdquot);
+                ip->i_gdquot = NULL;
-                        ip->i_gdquot = NULL;
+        }
-                }
+        xfs_iput(ip, XFS_ILOCK_EXCL);
-                xfs_iput(ip, XFS_ILOCK_EXCL);
+        IRELE(ip);
-        } while (nr_found);
+        return 0;
 }
 /*
 * Go thru all the inodes in the file system, releasing their dquots.
+ *
 * Note that the mount structure gets modified to indicate that quotas are off
- * AFTER this, in the case of quotaoff. This also gets called from
+ * AFTER this, in the case of quotaoff.
- * xfs_rootumount.
 */
 void
 xfs_qm_dqrele_all_inodes(
        struct xfs_mount *mp,
        uint             flags)
 {
-        int             i;
        ASSERT(mp->m_quotainfo);
-        for (i = 0; i < mp->m_sb.sb_agcount; i++) {
+        xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG);
-                if (!mp->m_perag[i].pag_ici_init)
-                        continue;
-                xfs_qm_dqrele_inodes_ag(mp, i, flags);
-        }
 }
 /*------------------------------------------------------------------------*/
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index 447173bcf96d..97ac9640be98 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -42,7 +42,6 @@
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
 #include "xfs_rw.h"
-#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_trans_priv.h"
@@ -111,7 +110,7 @@ xfs_trans_log_dquot(
 * Carry forward whatever is left of the quota blk reservation to
 * the spanky new transaction
 */
-STATIC void
+void
 xfs_trans_dup_dqinfo(
        xfs_trans_t     *otp,
        xfs_trans_t     *ntp)
@@ -167,19 +166,17 @@ xfs_trans_dup_dqinfo(
 /*
 * Wrap around mod_dquot to account for both user and group quotas.
 */
-STATIC void
+void
 xfs_trans_mod_dquot_byino(
        xfs_trans_t     *tp,
        xfs_inode_t     *ip,
        uint            field,
        long            delta)
 {
-        xfs_mount_t     *mp;
+        xfs_mount_t     *mp = tp->t_mountp;
-        ASSERT(tp);
-        mp = tp->t_mountp;
-        if (!XFS_IS_QUOTA_ON(mp) ||
+        if (!XFS_IS_QUOTA_RUNNING(mp) ||
+            !XFS_IS_QUOTA_ON(mp) ||
            ip->i_ino == mp->m_sb.sb_uquotino ||
            ip->i_ino == mp->m_sb.sb_gquotino)
                return;
@@ -229,6 +226,7 @@ xfs_trans_mod_dquot(
        xfs_dqtrx_t     *qtrx;
        ASSERT(tp);
+        ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp));
        qtrx = NULL;
        if (tp->t_dqinfo == NULL)
@@ -346,7 +344,7 @@ xfs_trans_dqlockedjoin(
 * Unreserve just the reservations done by this transaction.
 * dquot is still left locked at exit.
 */
-STATIC void
+void
 xfs_trans_apply_dquot_deltas(
        xfs_trans_t             *tp)
 {
@@ -357,7 +355,7 @@ xfs_trans_apply_dquot_deltas(
        long                    totalbdelta;
        long                    totalrtbdelta;
-        if (! (tp->t_flags & XFS_TRANS_DQ_DIRTY))
+        if (!(tp->t_flags & XFS_TRANS_DQ_DIRTY))
                return;
        ASSERT(tp->t_dqinfo);
@@ -531,7 +529,7 @@ xfs_trans_apply_dquot_deltas(
 * we simply throw those away, since that's the expected behavior
 * when a transaction is curtailed without a commit.
 */
-STATIC void
+void
 xfs_trans_unreserve_and_mod_dquots(
        xfs_trans_t             *tp)
 {
@@ -768,7 +766,7 @@ xfs_trans_reserve_quota_bydquots(
 {
        int             resvd = 0, error;
-        if (!XFS_IS_QUOTA_ON(mp))
+        if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
                return 0;
        if (tp && tp->t_dqinfo == NULL)
@@ -811,18 +809,17 @@ xfs_trans_reserve_quota_bydquots(
 * This doesn't change the actual usage, just the reservation.
 * The inode sent in is locked.
 */
-STATIC int
+int
 xfs_trans_reserve_quota_nblks(
-        xfs_trans_t     *tp,
+        struct xfs_trans        *tp,
-        xfs_mount_t     *mp,
+        struct xfs_inode        *ip,
-        xfs_inode_t     *ip,
+        long                    nblks,
-        long            nblks,
+        long                    ninos,
-        long            ninos,
+        uint                    flags)
-        uint            flags)
 {
-        int             error;
+        struct xfs_mount        *mp = ip->i_mount;
-        if (!XFS_IS_QUOTA_ON(mp))
+        if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
                return 0;
        if (XFS_IS_PQUOTA_ON(mp))
                flags |= XFS_QMOPT_ENOSPC;
@@ -831,7 +828,6 @@ xfs_trans_reserve_quota_nblks(
        ASSERT(ip->i_ino != mp->m_sb.sb_gquotino);
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-        ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
        ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
                                XFS_TRANS_DQ_RES_RTBLKS ||
               (flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
@@ -840,11 +836,9 @@ xfs_trans_reserve_quota_nblks(
        /*
         * Reserve nblks against these dquots, with trans as the mediator.
         */
-        error = xfs_trans_reserve_quota_bydquots(tp, mp,
+        return xfs_trans_reserve_quota_bydquots(tp, mp,
-                                                 ip->i_udquot, ip->i_gdquot,
+                                                ip->i_udquot, ip->i_gdquot,
-                                                 nblks, ninos,
+                                                nblks, ninos, flags);
-                                                 flags);
-        return error;
 }
 /*
@@ -895,25 +889,15 @@ STATIC void
 xfs_trans_alloc_dqinfo(
        xfs_trans_t     *tp)
 {
-        (tp)->t_dqinfo = kmem_zone_zalloc(xfs_Gqm->qm_dqtrxzone, KM_SLEEP);
+        tp->t_dqinfo = kmem_zone_zalloc(xfs_Gqm->qm_dqtrxzone, KM_SLEEP);
 }
-STATIC void
+void
 xfs_trans_free_dqinfo(
        xfs_trans_t     *tp)
 {
        if (!tp->t_dqinfo)
                return;
-        kmem_zone_free(xfs_Gqm->qm_dqtrxzone, (tp)->t_dqinfo);
+        kmem_zone_free(xfs_Gqm->qm_dqtrxzone, tp->t_dqinfo);
-        (tp)->t_dqinfo = NULL;
+        tp->t_dqinfo = NULL;
 }
-xfs_dqtrxops_t  xfs_trans_dquot_ops = {
-        .qo_dup_dqinfo                  = xfs_trans_dup_dqinfo,
-        .qo_free_dqinfo                 = xfs_trans_free_dqinfo,
-        .qo_mod_dquot_byino             = xfs_trans_mod_dquot_byino,
-        .qo_apply_dquot_deltas          = xfs_trans_apply_dquot_deltas,
-        .qo_reserve_quota_nblks         = xfs_trans_reserve_quota_nblks,
-        .qo_reserve_quota_bydquots      = xfs_trans_reserve_quota_bydquots,
-        .qo_unreserve_and_mod_dquots    = xfs_trans_unreserve_and_mod_dquots,
-};
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
deleted file mode 100644
index a8cdd73999a4..000000000000
--- a/fs/xfs/xfs_acl.c
+++ /dev/null
@@ -1,874 +0,0 @@
-/*
- * Copyright (c) 2001-2002,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_types.h"
-#include "xfs_bit.h"
-#include "xfs_inum.h"
-#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_btree.h"
-#include "xfs_acl.h"
-#include "xfs_attr.h"
-#include "xfs_vnodeops.h"
-#include <linux/capability.h>
-#include <linux/posix_acl_xattr.h>
-STATIC int      xfs_acl_setmode(struct inode *, xfs_acl_t *, int *);
-STATIC void     xfs_acl_filter_mode(mode_t, xfs_acl_t *);
-STATIC void     xfs_acl_get_endian(xfs_acl_t *);
-STATIC int      xfs_acl_access(uid_t, gid_t, xfs_acl_t *, mode_t, cred_t *);
-STATIC int      xfs_acl_invalid(xfs_acl_t *);
-STATIC void     xfs_acl_sync_mode(mode_t, xfs_acl_t *);
-STATIC void     xfs_acl_get_attr(struct inode *, xfs_acl_t *, int, int, int *);
-STATIC void     xfs_acl_set_attr(struct inode *, xfs_acl_t *, int, int *);
-STATIC int      xfs_acl_allow_set(struct inode *, int);
-kmem_zone_t *xfs_acl_zone;
-/*
- * Test for existence of access ACL attribute as efficiently as possible.
- */
-int
-xfs_acl_vhasacl_access(
-        struct inode    *vp)
-{
-        int             error;
-        xfs_acl_get_attr(vp, NULL, _ACL_TYPE_ACCESS, ATTR_KERNOVAL, &error);
-        return (error == 0);
-}
-/*
- * Test for existence of default ACL attribute as efficiently as possible.
- */
-int
-xfs_acl_vhasacl_default(
-        struct inode    *vp)
-{
-        int             error;
-        if (!S_ISDIR(vp->i_mode))
-                return 0;
-        xfs_acl_get_attr(vp, NULL, _ACL_TYPE_DEFAULT, ATTR_KERNOVAL, &error);
-        return (error == 0);
-}
-/*
- * Convert from extended attribute representation to in-memory for XFS.
- */
-STATIC int
-posix_acl_xattr_to_xfs(
-        posix_acl_xattr_header  *src,
-        size_t                  size,
-        xfs_acl_t               *dest)
-{
-        posix_acl_xattr_entry   *src_entry;
-        xfs_acl_entry_t         *dest_entry;
-        int                     n;
-        if (!src || !dest)
-                return EINVAL;
-        if (size < sizeof(posix_acl_xattr_header))
-                return EINVAL;
-        if (src->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION))
-                return EOPNOTSUPP;
-        memset(dest, 0, sizeof(xfs_acl_t));
-        dest->acl_cnt = posix_acl_xattr_count(size);
-        if (dest->acl_cnt < 0 || dest->acl_cnt > XFS_ACL_MAX_ENTRIES)
-                return EINVAL;
-        /*
-         * acl_set_file(3) may request that we set default ACLs with
-         * zero length -- defend (gracefully) against that here.
-         */
-        if (!dest->acl_cnt)
-                return 0;
-        src_entry = (posix_acl_xattr_entry *)((char *)src + sizeof(*src));
-        dest_entry = &dest->acl_entry[0];
-        for (n = 0; n < dest->acl_cnt; n++, src_entry++, dest_entry++) {
-                dest_entry->ae_perm = le16_to_cpu(src_entry->e_perm);
-                if (_ACL_PERM_INVALID(dest_entry->ae_perm))
-                        return EINVAL;
-                dest_entry->ae_tag  = le16_to_cpu(src_entry->e_tag);
-                switch(dest_entry->ae_tag) {
-                case ACL_USER:
-                case ACL_GROUP:
-                        dest_entry->ae_id = le32_to_cpu(src_entry->e_id);
-                        break;
-                case ACL_USER_OBJ:
-                case ACL_GROUP_OBJ:
-                case ACL_MASK:
-                case ACL_OTHER:
-                        dest_entry->ae_id = ACL_UNDEFINED_ID;
-                        break;
-                default:
-                        return EINVAL;
-                }
-        }
-        if (xfs_acl_invalid(dest))
-                return EINVAL;
-        return 0;
-}
-/*
- * Comparison function called from xfs_sort().
- * Primary key is ae_tag, secondary key is ae_id.
- */
-STATIC int
-xfs_acl_entry_compare(
-        const void      *va,
-        const void      *vb)
-{
-        xfs_acl_entry_t *a = (xfs_acl_entry_t *)va,
-                        *b = (xfs_acl_entry_t *)vb;
-        if (a->ae_tag == b->ae_tag)
-                return (a->ae_id - b->ae_id);
-        return (a->ae_tag - b->ae_tag);
-}
-/*
- * Convert from in-memory XFS to extended attribute representation.
- */
-STATIC int
-posix_acl_xfs_to_xattr(
-        xfs_acl_t               *src,
-        posix_acl_xattr_header  *dest,
-        size_t                  size)
-{
-        int                     n;
-        size_t                  new_size = posix_acl_xattr_size(src->acl_cnt);
-        posix_acl_xattr_entry   *dest_entry;
-        xfs_acl_entry_t         *src_entry;
-        if (size < new_size)
-                return -ERANGE;
-        /* Need to sort src XFS ACL by <ae_tag,ae_id> */
-        xfs_sort(src->acl_entry, src->acl_cnt, sizeof(src->acl_entry[0]),
-                 xfs_acl_entry_compare);
-        dest->a_version = cpu_to_le32(POSIX_ACL_XATTR_VERSION);
-        dest_entry = &dest->a_entries[0];
-        src_entry = &src->acl_entry[0];
-        for (n = 0; n < src->acl_cnt; n++, dest_entry++, src_entry++) {
-                dest_entry->e_perm = cpu_to_le16(src_entry->ae_perm);
-                if (_ACL_PERM_INVALID(src_entry->ae_perm))
-                        return -EINVAL;
-                dest_entry->e_tag  = cpu_to_le16(src_entry->ae_tag);
-                switch (src_entry->ae_tag) {
-                case ACL_USER:
-                case ACL_GROUP:
-                        dest_entry->e_id = cpu_to_le32(src_entry->ae_id);
-                                break;
-                case ACL_USER_OBJ:
-                case ACL_GROUP_OBJ:
-                case ACL_MASK:
-                case ACL_OTHER:
-                        dest_entry->e_id = cpu_to_le32(ACL_UNDEFINED_ID);
-                        break;
-                default:
-                        return -EINVAL;
-                }
-        }
-        return new_size;
-}
-int
-xfs_acl_vget(
-        struct inode    *vp,
-        void            *acl,
-        size_t          size,
-        int             kind)
-{
-        int                     error;
-        xfs_acl_t               *xfs_acl = NULL;
-        posix_acl_xattr_header  *ext_acl = acl;
-        int                     flags = 0;
-        if(size) {
-                if (!(_ACL_ALLOC(xfs_acl))) {
-                        error = ENOMEM;
-                        goto out;
-                }
-                memset(xfs_acl, 0, sizeof(xfs_acl_t));
-        } else
-                flags = ATTR_KERNOVAL;
-        xfs_acl_get_attr(vp, xfs_acl, kind, flags, &error);
-        if (error)
-                goto out;
-        if (!size) {
-                error = -posix_acl_xattr_size(XFS_ACL_MAX_ENTRIES);
-        } else {
-                if (xfs_acl_invalid(xfs_acl)) {
-                        error = EINVAL;
-                        goto out;
-                }
-                if (kind == _ACL_TYPE_ACCESS)
-                        xfs_acl_sync_mode(XFS_I(vp)->i_d.di_mode, xfs_acl);
-                error = -posix_acl_xfs_to_xattr(xfs_acl, ext_acl, size);
-        }
-out:
-        if(xfs_acl)
-                _ACL_FREE(xfs_acl);
-        return -error;
-}
-int
-xfs_acl_vremove(
-        struct inode    *vp,
-        int             kind)
-{
-        int             error;
-        error = xfs_acl_allow_set(vp, kind);
-        if (!error) {
-                error = xfs_attr_remove(XFS_I(vp),
-                                                kind == _ACL_TYPE_DEFAULT?
-                                                SGI_ACL_DEFAULT: SGI_ACL_FILE,
-                                                ATTR_ROOT);
-                if (error == ENOATTR)
-                        error = 0;      /* 'scool */
-        }
-        return -error;
-}
-int
-xfs_acl_vset(
-        struct inode            *vp,
-        void                    *acl,
-        size_t                  size,
-        int                     kind)
-{
-        posix_acl_xattr_header  *ext_acl = acl;
-        xfs_acl_t               *xfs_acl;
-        int                     error;
-        int                     basicperms = 0; /* more than std unix perms? */
-        if (!acl)
-                return -EINVAL;
-        if (!(_ACL_ALLOC(xfs_acl)))
-                return -ENOMEM;
-        error = posix_acl_xattr_to_xfs(ext_acl, size, xfs_acl);
-        if (error) {
-                _ACL_FREE(xfs_acl);
-                return -error;
-        }
-        if (!xfs_acl->acl_cnt) {
-                _ACL_FREE(xfs_acl);
-                return 0;
-        }
-        error = xfs_acl_allow_set(vp, kind);
-        /* Incoming ACL exists, set file mode based on its value */
-        if (!error && kind == _ACL_TYPE_ACCESS)
-                error = xfs_acl_setmode(vp, xfs_acl, &basicperms);
-        if (error)
-                goto out;
-        /*
-         * If we have more than std unix permissions, set up the actual attr.
-         * Otherwise, delete any existing attr.  This prevents us from
-         * having actual attrs for permissions that can be stored in the
-         * standard permission bits.
-         */
-        if (!basicperms) {
-                xfs_acl_set_attr(vp, xfs_acl, kind, &error);
-        } else {
-                error = -xfs_acl_vremove(vp, _ACL_TYPE_ACCESS);
-        }
-out:
-        _ACL_FREE(xfs_acl);
-        return -error;
-}
-int
-xfs_acl_iaccess(
-        xfs_inode_t     *ip,
-        mode_t          mode,
-        cred_t          *cr)
-{
-        xfs_acl_t       *acl;
-        int             rval;
-        struct xfs_name acl_name = {SGI_ACL_FILE, SGI_ACL_FILE_SIZE};
-        if (!(_ACL_ALLOC(acl)))
-                return -1;
-        /* If the file has no ACL return -1. */
-        rval = sizeof(xfs_acl_t);
-        if (xfs_attr_fetch(ip, &acl_name, (char *)acl, &rval, ATTR_ROOT)) {
-                _ACL_FREE(acl);
-                return -1;
-        }
-        xfs_acl_get_endian(acl);
-        /* If the file has an empty ACL return -1. */
-        if (acl->acl_cnt == XFS_ACL_NOT_PRESENT) {
-                _ACL_FREE(acl);
-                return -1;
-        }
-        /* Synchronize ACL with mode bits */
-        xfs_acl_sync_mode(ip->i_d.di_mode, acl);
-        rval = xfs_acl_access(ip->i_d.di_uid, ip->i_d.di_gid, acl, mode, cr);
-        _ACL_FREE(acl);
-        return rval;
-}
-STATIC int
-xfs_acl_allow_set(
-        struct inode    *vp,
-        int             kind)
-{
-        if (vp->i_flags & (S_IMMUTABLE|S_APPEND))
-                return EPERM;
-        if (kind == _ACL_TYPE_DEFAULT && !S_ISDIR(vp->i_mode))
-                return ENOTDIR;
-        if (vp->i_sb->s_flags & MS_RDONLY)
-                return EROFS;
-        if (XFS_I(vp)->i_d.di_uid != current_fsuid() && !capable(CAP_FOWNER))
-                return EPERM;
-        return 0;
-}
-/*
- * Note: cr is only used here for the capability check if the ACL test fails.
- *       It is not used to find out the credentials uid or groups etc, as was
- *       done in IRIX. It is assumed that the uid and groups for the current
- *       thread are taken from "current" instead of the cr parameter.
- */
-STATIC int
-xfs_acl_access(
-        uid_t           fuid,
-        gid_t           fgid,
-        xfs_acl_t       *fap,
-        mode_t          md,
-        cred_t          *cr)
-{
-        xfs_acl_entry_t matched;
-        int             i, allows;
-        int             maskallows = -1;        /* true, but not 1, either */
-        int             seen_userobj = 0;
-        matched.ae_tag = 0;     /* Invalid type */
-        matched.ae_perm = 0;
-        for (i = 0; i < fap->acl_cnt; i++) {
-                /*
-                 * Break out if we've got a user_obj entry or
-                 * a user entry and the mask (and have processed USER_OBJ)
-                 */
-                if (matched.ae_tag == ACL_USER_OBJ)
-                        break;
-                if (matched.ae_tag == ACL_USER) {
-                        if (maskallows != -1 && seen_userobj)
-                                break;
-                        if (fap->acl_entry[i].ae_tag != ACL_MASK &&
-                            fap->acl_entry[i].ae_tag != ACL_USER_OBJ)
-                                continue;
-                }
-                /* True if this entry allows the requested access */
-                allows = ((fap->acl_entry[i].ae_perm & md) == md);
-                switch (fap->acl_entry[i].ae_tag) {
-                case ACL_USER_OBJ:
-                        seen_userobj = 1;
-                        if (fuid != current_fsuid())
-                                continue;
-                        matched.ae_tag = ACL_USER_OBJ;
-                        matched.ae_perm = allows;
-                        break;
-                case ACL_USER:
-                        if (fap->acl_entry[i].ae_id != current_fsuid())
-                                continue;
-                        matched.ae_tag = ACL_USER;
-                        matched.ae_perm = allows;
-                        break;
-                case ACL_GROUP_OBJ:
-                        if ((matched.ae_tag == ACL_GROUP_OBJ ||
-                            matched.ae_tag == ACL_GROUP) && !allows)
-                                continue;
-                        if (!in_group_p(fgid))
-                                continue;
-                        matched.ae_tag = ACL_GROUP_OBJ;
-                        matched.ae_perm = allows;
-                        break;
-                case ACL_GROUP:
-                        if ((matched.ae_tag == ACL_GROUP_OBJ ||
-                            matched.ae_tag == ACL_GROUP) && !allows)
-                                continue;
-                        if (!in_group_p(fap->acl_entry[i].ae_id))
-                                continue;
-                        matched.ae_tag = ACL_GROUP;
-                        matched.ae_perm = allows;
-                        break;
-                case ACL_MASK:
-                        maskallows = allows;
-                        break;
-                case ACL_OTHER:
-                        if (matched.ae_tag != 0)
-                                continue;
-                        matched.ae_tag = ACL_OTHER;
-                        matched.ae_perm = allows;
-                        break;
-                }
-        }
-        /*
-         * First possibility is that no matched entry allows access.
-         * The capability to override DAC may exist, so check for it.
-         */
-        switch (matched.ae_tag) {
-        case ACL_OTHER:
-        case ACL_USER_OBJ:
-                if (matched.ae_perm)
-                        return 0;
-                break;
-        case ACL_USER:
-        case ACL_GROUP_OBJ:
-        case ACL_GROUP:
-                if (maskallows && matched.ae_perm)
-                        return 0;
-                break;
-        case 0:
-                break;
-        }
-        /* EACCES tells generic_permission to check for capability overrides */
-        return EACCES;
-}
-/*
- * ACL validity checker.
- *   This acl validation routine checks each ACL entry read in makes sense.
- */
-STATIC int
-xfs_acl_invalid(
-        xfs_acl_t       *aclp)
-{
-        xfs_acl_entry_t *entry, *e;
-        int             user = 0, group = 0, other = 0, mask = 0;
-        int             mask_required = 0;
-        int             i, j;
-        if (!aclp)
-                goto acl_invalid;
-        if (aclp->acl_cnt > XFS_ACL_MAX_ENTRIES)
-                goto acl_invalid;
-        for (i = 0; i < aclp->acl_cnt; i++) {
-                entry = &aclp->acl_entry[i];
-                switch (entry->ae_tag) {
-                case ACL_USER_OBJ:
-                        if (user++)
-                                goto acl_invalid;
-                        break;
-                case ACL_GROUP_OBJ:
-                        if (group++)
-                                goto acl_invalid;
-                        break;
-                case ACL_OTHER:
-                        if (other++)
-                                goto acl_invalid;
-                        break;
-                case ACL_USER:
-                case ACL_GROUP:
-                        for (j = i + 1; j < aclp->acl_cnt; j++) {
-                                e = &aclp->acl_entry[j];
-                                if (e->ae_id == entry->ae_id &&
-                                    e->ae_tag == entry->ae_tag)
-                                        goto acl_invalid;
-                        }
-                        mask_required++;
-                        break;
-                case ACL_MASK:
-                        if (mask++)
-                                goto acl_invalid;
-                        break;
-                default:
-                        goto acl_invalid;
-                }
-        }
-        if (!user || !group || !other || (mask_required && !mask))
-                goto acl_invalid;
-        else
-                return 0;
-acl_invalid:
-        return EINVAL;
-}
-/*
- * Do ACL endian conversion.
- */
-STATIC void
-xfs_acl_get_endian(
-        xfs_acl_t       *aclp)
-{
-        xfs_acl_entry_t *ace, *end;
-        INT_SET(aclp->acl_cnt, ARCH_CONVERT, aclp->acl_cnt);
-        end = &aclp->acl_entry[0]+aclp->acl_cnt;
-        for (ace = &aclp->acl_entry[0]; ace < end; ace++) {
-                INT_SET(ace->ae_tag, ARCH_CONVERT, ace->ae_tag);
-                INT_SET(ace->ae_id, ARCH_CONVERT, ace->ae_id);
-                INT_SET(ace->ae_perm, ARCH_CONVERT, ace->ae_perm);
-        }
-}
-/*
- * Get the ACL from the EA and do endian conversion.
- */
-STATIC void
-xfs_acl_get_attr(
-        struct inode    *vp,
-        xfs_acl_t       *aclp,
-        int             kind,
-        int             flags,
-        int             *error)
-{
-        int             len = sizeof(xfs_acl_t);
-        ASSERT((flags & ATTR_KERNOVAL) ? (aclp == NULL) : 1);
-        flags |= ATTR_ROOT;
-        *error = xfs_attr_get(XFS_I(vp),
-                                        kind == _ACL_TYPE_ACCESS ?
-                                        SGI_ACL_FILE : SGI_ACL_DEFAULT,
-                                        (char *)aclp, &len, flags);
-        if (*error || (flags & ATTR_KERNOVAL))
-                return;
-        xfs_acl_get_endian(aclp);
-}
-/*
- * Set the EA with the ACL and do endian conversion.
- */
-STATIC void
-xfs_acl_set_attr(
-        struct inode    *vp,
-        xfs_acl_t       *aclp,
-        int             kind,
-        int             *error)
-{
-        xfs_acl_entry_t *ace, *newace, *end;
-        xfs_acl_t       *newacl;
-        int             len;
-        if (!(_ACL_ALLOC(newacl))) {
-                *error = ENOMEM;
-                return;
-        }
-        len = sizeof(xfs_acl_t) -
-              (sizeof(xfs_acl_entry_t) * (XFS_ACL_MAX_ENTRIES - aclp->acl_cnt));
-        end = &aclp->acl_entry[0]+aclp->acl_cnt;
-        for (ace = &aclp->acl_entry[0], newace = &newacl->acl_entry[0];
-             ace < end;
-             ace++, newace++) {
-                INT_SET(newace->ae_tag, ARCH_CONVERT, ace->ae_tag);
-                INT_SET(newace->ae_id, ARCH_CONVERT, ace->ae_id);
-                INT_SET(newace->ae_perm, ARCH_CONVERT, ace->ae_perm);
-        }
-        INT_SET(newacl->acl_cnt, ARCH_CONVERT, aclp->acl_cnt);
-        *error = xfs_attr_set(XFS_I(vp),
-                                kind == _ACL_TYPE_ACCESS ?
-                                SGI_ACL_FILE: SGI_ACL_DEFAULT,
-                                (char *)newacl, len, ATTR_ROOT);
-        _ACL_FREE(newacl);
-}
-int
-xfs_acl_vtoacl(
-        struct inode    *vp,
-        xfs_acl_t       *access_acl,
-        xfs_acl_t       *default_acl)
-{
-        int             error = 0;
-        if (access_acl) {
-                /*
-                 * Get the Access ACL and the mode.  If either cannot
-                 * be obtained for some reason, invalidate the access ACL.
-                 */
-                xfs_acl_get_attr(vp, access_acl, _ACL_TYPE_ACCESS, 0, &error);
-                if (error)
-                        access_acl->acl_cnt = XFS_ACL_NOT_PRESENT;
-                else /* We have a good ACL and the file mode, synchronize. */
-                        xfs_acl_sync_mode(XFS_I(vp)->i_d.di_mode, access_acl);
-        }
-        if (default_acl) {
-                xfs_acl_get_attr(vp, default_acl, _ACL_TYPE_DEFAULT, 0, &error);
-                if (error)
-                        default_acl->acl_cnt = XFS_ACL_NOT_PRESENT;
-        }
-        return error;
-}
-/*
- * This function retrieves the parent directory's acl, processes it
- * and lets the child inherit the acl(s) that it should.
- */
-int
-xfs_acl_inherit(
-        struct inode    *vp,
-        mode_t          mode,
-        xfs_acl_t       *pdaclp)
-{
-        xfs_acl_t       *cacl;
-        int             error = 0;
-        int             basicperms = 0;
-        /*
-         * If the parent does not have a default ACL, or it's an
-         * invalid ACL, we're done.
-         */
-        if (!vp)
-                return 0;
-        if (!pdaclp || xfs_acl_invalid(pdaclp))
-                return 0;
-        /*
-         * Copy the default ACL of the containing directory to
-         * the access ACL of the new file and use the mode that
-         * was passed in to set up the correct initial values for
-         * the u::,g::[m::], and o:: entries.  This is what makes
-         * umask() "work" with ACL's.
-         */
-        if (!(_ACL_ALLOC(cacl)))
-                return ENOMEM;
-        memcpy(cacl, pdaclp, sizeof(xfs_acl_t));
-        xfs_acl_filter_mode(mode, cacl);
-        error = xfs_acl_setmode(vp, cacl, &basicperms);
-        if (error)
-                goto out_error;
-        /*
-         * Set the Default and Access ACL on the file.  The mode is already
-         * set on the file, so we don't need to worry about that.
-         *
-         * If the new file is a directory, its default ACL is a copy of
-         * the containing directory's default ACL.
-         */
-        if (S_ISDIR(vp->i_mode))
-                xfs_acl_set_attr(vp, pdaclp, _ACL_TYPE_DEFAULT, &error);
-        if (!error && !basicperms)
-                xfs_acl_set_attr(vp, cacl, _ACL_TYPE_ACCESS, &error);
-out_error:
-        _ACL_FREE(cacl);
-        return error;
-}
-/*
- * Set up the correct mode on the file based on the supplied ACL.  This
- * makes sure that the mode on the file reflects the state of the
- * u::,g::[m::], and o:: entries in the ACL.  Since the mode is where
- * the ACL is going to get the permissions for these entries, we must
- * synchronize the mode whenever we set the ACL on a file.
- */
-STATIC int
-xfs_acl_setmode(
-        struct inode    *vp,
-        xfs_acl_t       *acl,
-        int             *basicperms)
-{
-        struct iattr    iattr;
-        xfs_acl_entry_t *ap;
-        xfs_acl_entry_t *gap = NULL;
-        int             i, nomask = 1;
-        *basicperms = 1;
-        if (acl->acl_cnt == XFS_ACL_NOT_PRESENT)
-                return 0;
-        /*
-         * Copy the u::, g::, o::, and m:: bits from the ACL into the
-         * mode.  The m:: bits take precedence over the g:: bits.
-         */
-        iattr.ia_valid = ATTR_MODE;
-        iattr.ia_mode = XFS_I(vp)->i_d.di_mode;
-        iattr.ia_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO);
-        ap = acl->acl_entry;
-        for (i = 0; i < acl->acl_cnt; ++i) {
-                switch (ap->ae_tag) {
-                case ACL_USER_OBJ:
-                        iattr.ia_mode |= ap->ae_perm << 6;
-                        break;
-                case ACL_GROUP_OBJ:
-                        gap = ap;
-                        break;
-                case ACL_MASK:  /* more than just standard modes */
-                        nomask = 0;
-                        iattr.ia_mode |= ap->ae_perm << 3;
-                        *basicperms = 0;
-                        break;
-                case ACL_OTHER:
-                        iattr.ia_mode |= ap->ae_perm;
-                        break;
-                default:        /* more than just standard modes */
-                        *basicperms = 0;
-                        break;
-                }
-                ap++;
-        }
-        /* Set the group bits from ACL_GROUP_OBJ if there's no ACL_MASK */
-        if (gap && nomask)
-                iattr.ia_mode |= gap->ae_perm << 3;
-        return xfs_setattr(XFS_I(vp), &iattr, 0);
-}
-/*
- * The permissions for the special ACL entries (u::, g::[m::], o::) are
- * actually stored in the file mode (if there is both a group and a mask,
- * the group is stored in the ACL entry and the mask is stored on the file).
- * This allows the mode to remain automatically in sync with the ACL without
- * the need for a call-back to the ACL system at every point where the mode
- * could change.  This function takes the permissions from the specified mode
- * and places it in the supplied ACL.
- *
- * This implementation draws its validity from the fact that, when the ACL
- * was assigned, the mode was copied from the ACL.
- * If the mode did not change, therefore, the mode remains exactly what was
- * taken from the special ACL entries at assignment.
- * If a subsequent chmod() was done, the POSIX spec says that the change in
- * mode must cause an update to the ACL seen at user level and used for
- * access checks.  Before and after a mode change, therefore, the file mode
- * most accurately reflects what the special ACL entries should permit/deny.
- *
- * CAVEAT: If someone sets the SGI_ACL_FILE attribute directly,
- *         the existing mode bits will override whatever is in the
- *         ACL. Similarly, if there is a pre-existing ACL that was
- *         never in sync with its mode (owing to a bug in 6.5 and
- *         before), it will now magically (or mystically) be
- *         synchronized.  This could cause slight astonishment, but
- *         it is better than inconsistent permissions.
- *
- * The supplied ACL is a template that may contain any combination
- * of special entries.  These are treated as place holders when we fill
- * out the ACL.  This routine does not add or remove special entries, it
- * simply unites each special entry with its associated set of permissions.
- */
-STATIC void
-xfs_acl_sync_mode(
-        mode_t          mode,
-        xfs_acl_t       *acl)
-{
-        int             i, nomask = 1;
-        xfs_acl_entry_t *ap;
-        xfs_acl_entry_t *gap = NULL;
-        /*
-         * Set ACL entries. POSIX1003.1eD16 requires that the MASK
-         * be set instead of the GROUP entry, if there is a MASK.
-         */
-        for (ap = acl->acl_entry, i = 0; i < acl->acl_cnt; ap++, i++) {
-                switch (ap->ae_tag) {
-                case ACL_USER_OBJ:
-                        ap->ae_perm = (mode >> 6) & 0x7;
-                        break;
-                case ACL_GROUP_OBJ:
-                        gap = ap;
-                        break;
-                case ACL_MASK:
-                        nomask = 0;
-                        ap->ae_perm = (mode >> 3) & 0x7;
-                        break;
-                case ACL_OTHER:
-                        ap->ae_perm = mode & 0x7;
-                        break;
-                default:
-                        break;
-                }
-        }
-        /* Set the ACL_GROUP_OBJ if there's no ACL_MASK */
-        if (gap && nomask)
-                gap->ae_perm = (mode >> 3) & 0x7;
-}
-/*
- * When inheriting an Access ACL from a directory Default ACL,
- * the ACL bits are set to the intersection of the ACL default
- * permission bits and the file permission bits in mode. If there
- * are no permission bits on the file then we must not give them
- * the ACL. This is what what makes umask() work with ACLs.
- */
-STATIC void
-xfs_acl_filter_mode(
-        mode_t          mode,
-        xfs_acl_t       *acl)
-{
-        int             i, nomask = 1;
-        xfs_acl_entry_t *ap;
-        xfs_acl_entry_t *gap = NULL;
-        /*
-         * Set ACL entries. POSIX1003.1eD16 requires that the MASK
-         * be merged with GROUP entry, if there is a MASK.
-         */
-        for (ap = acl->acl_entry, i = 0; i < acl->acl_cnt; ap++, i++) {
-                switch (ap->ae_tag) {
-                case ACL_USER_OBJ:
-                        ap->ae_perm &= (mode >> 6) & 0x7;
-                        break;
-                case ACL_GROUP_OBJ:
-                        gap = ap;
-                        break;
-                case ACL_MASK:
-                        nomask = 0;
-                        ap->ae_perm &= (mode >> 3) & 0x7;
-                        break;
-                case ACL_OTHER:
-                        ap->ae_perm &= mode & 0x7;
-                        break;
-                default:
-                        break;
-                }
-        }
-        /* Set the ACL_GROUP_OBJ if there's no ACL_MASK */
-        if (gap && nomask)
-                gap->ae_perm &= (mode >> 3) & 0x7;
-}
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 642f1db4def4..63dc1f2efad5 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -18,81 +18,48 @@
 #ifndef __XFS_ACL_H__
 #define __XFS_ACL_H__
-/*
+struct inode;
- * Access Control Lists
+struct posix_acl;
- */
+struct xfs_inode;
-typedef __uint16_t      xfs_acl_perm_t;
-typedef __int32_t       xfs_acl_tag_t;
-typedef __int32_t       xfs_acl_id_t;
 #define XFS_ACL_MAX_ENTRIES 25
 #define XFS_ACL_NOT_PRESENT (-1)
-typedef struct xfs_acl_entry {
+/* On-disk XFS access control list structure */
-        xfs_acl_tag_t   ae_tag;
+struct xfs_acl {
-        xfs_acl_id_t    ae_id;
+        __be32          acl_cnt;
-        xfs_acl_perm_t  ae_perm;
+        struct xfs_acl_entry {
-} xfs_acl_entry_t;
+                __be32  ae_tag;
+                __be32  ae_id;
-typedef struct xfs_acl {
+                __be16  ae_perm;
-        __int32_t       acl_cnt;
+        } acl_entry[XFS_ACL_MAX_ENTRIES];
-        xfs_acl_entry_t acl_entry[XFS_ACL_MAX_ENTRIES];
+};
-} xfs_acl_t;
 /* On-disk XFS extended attribute names */
-#define SGI_ACL_FILE    "SGI_ACL_FILE"
+#define SGI_ACL_FILE            "SGI_ACL_FILE"
-#define SGI_ACL_DEFAULT "SGI_ACL_DEFAULT"
+#define SGI_ACL_DEFAULT         "SGI_ACL_DEFAULT"
 #define SGI_ACL_FILE_SIZE       (sizeof(SGI_ACL_FILE)-1)
 #define SGI_ACL_DEFAULT_SIZE    (sizeof(SGI_ACL_DEFAULT)-1)
-#define _ACL_TYPE_ACCESS        1
-#define _ACL_TYPE_DEFAULT       2
 #ifdef CONFIG_XFS_POSIX_ACL
+extern int xfs_check_acl(struct inode *inode, int mask);
+extern struct posix_acl *xfs_get_acl(struct inode *inode, int type);
+extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl);
+extern int xfs_acl_chmod(struct inode *inode);
+extern void xfs_inode_init_acls(struct xfs_inode *ip);
+extern void xfs_inode_clear_acls(struct xfs_inode *ip);
+extern int posix_acl_access_exists(struct inode *inode);
+extern int posix_acl_default_exists(struct inode *inode);
-struct vattr;
+extern struct xattr_handler xfs_xattr_system_handler;
-struct xfs_inode;
-extern struct kmem_zone *xfs_acl_zone;
-#define xfs_acl_zone_init(zone, name)   \
-                (zone) = kmem_zone_init(sizeof(xfs_acl_t), (name))
-#define xfs_acl_zone_destroy(zone)      kmem_zone_destroy(zone)
-extern int xfs_acl_inherit(struct inode *, mode_t mode, xfs_acl_t *);
-extern int xfs_acl_iaccess(struct xfs_inode *, mode_t, cred_t *);
-extern int xfs_acl_vtoacl(struct inode *, xfs_acl_t *, xfs_acl_t *);
-extern int xfs_acl_vhasacl_access(struct inode *);
-extern int xfs_acl_vhasacl_default(struct inode *);
-extern int xfs_acl_vset(struct inode *, void *, size_t, int);
-extern int xfs_acl_vget(struct inode *, void *, size_t, int);
-extern int xfs_acl_vremove(struct inode *, int);
-#define _ACL_PERM_INVALID(perm) ((perm) & ~(ACL_READ|ACL_WRITE|ACL_EXECUTE))
-#define _ACL_INHERIT(c,m,d)     (xfs_acl_inherit(c,m,d))
-#define _ACL_GET_ACCESS(pv,pa)  (xfs_acl_vtoacl(pv,pa,NULL) == 0)
-#define _ACL_GET_DEFAULT(pv,pd) (xfs_acl_vtoacl(pv,NULL,pd) == 0)
-#define _ACL_ACCESS_EXISTS      xfs_acl_vhasacl_access
-#define _ACL_DEFAULT_EXISTS     xfs_acl_vhasacl_default
-#define _ACL_ALLOC(a)           ((a) = kmem_zone_alloc(xfs_acl_zone, KM_SLEEP))
-#define _ACL_FREE(a)            ((a)? kmem_zone_free(xfs_acl_zone, (a)):(void)0)
 #else
-#define xfs_acl_zone_init(zone,name)
+# define xfs_check_acl                                  NULL
-#define xfs_acl_zone_destroy(zone)
+# define xfs_get_acl(inode, type)                       NULL
-#define xfs_acl_vset(v,p,sz,t)  (-EOPNOTSUPP)
+# define xfs_inherit_acl(inode, default_acl)            0
-#define xfs_acl_vget(v,p,sz,t)  (-EOPNOTSUPP)
+# define xfs_acl_chmod(inode)                           0
-#define xfs_acl_vremove(v,t)    (-EOPNOTSUPP)
+# define xfs_inode_init_acls(ip)
-#define xfs_acl_vhasacl_access(v)       (0)
+# define xfs_inode_clear_acls(ip)
-#define xfs_acl_vhasacl_default(v)      (0)
+# define posix_acl_access_exists(inode)                 0
-#define _ACL_ALLOC(a)           (1)     /* successfully allocate nothing */
+# define posix_acl_default_exists(inode)                0
-#define _ACL_FREE(a)            ((void)0)
+#endif /* CONFIG_XFS_POSIX_ACL */
-#define _ACL_INHERIT(c,m,d)     (0)
-#define _ACL_GET_ACCESS(pv,pa)  (0)
-#define _ACL_GET_DEFAULT(pv,pd) (0)
-#define _ACL_ACCESS_EXISTS      (NULL)
-#define _ACL_DEFAULT_EXISTS     (NULL)
-#endif
 #endif  /* __XFS_ACL_H__ */
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index c8641f713caa..f24b50b68d03 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -212,6 +212,8 @@ typedef struct xfs_perag
 /*
 * tags for inode radix tree
 */
+#define XFS_ICI_NO_TAG          (-1)    /* special flag for an untagged lookup
+                                           in xfs_inode_ag_iterator */
 #define XFS_ICI_RECLAIM_TAG     0       /* inode is to be reclaimed */
 #define XFS_AG_MAXLEVELS(mp)            ((mp)->m_ag_maxlevels)
diff --git a/fs/xfs/xfs_arch.h b/fs/xfs/xfs_arch.h
index 53d5e70d1360..0902249354a0 100644
--- a/fs/xfs/xfs_arch.h
+++ b/fs/xfs/xfs_arch.h
@@ -73,28 +73,6 @@ static inline void be64_add_cpu(__be64 *a, __s64 b)
 #endif  /* __KERNEL__ */
-/* do we need conversion? */
-#define ARCH_NOCONVERT 1
-#ifdef XFS_NATIVE_HOST
-# define ARCH_CONVERT   ARCH_NOCONVERT
-#else
-# define ARCH_CONVERT   0
-#endif
-/* generic swapping macros */
-#ifndef HAVE_SWABMACROS
-#define INT_SWAP16(type,var) ((typeof(type))(__swab16((__u16)(var))))
-#define INT_SWAP32(type,var) ((typeof(type))(__swab32((__u32)(var))))
-#define INT_SWAP64(type,var) ((typeof(type))(__swab64((__u64)(var))))
-#endif
-#define INT_SWAP(type, var) \
-    ((sizeof(type) == 8) ? INT_SWAP64(type,var) : \
-    ((sizeof(type) == 4) ? INT_SWAP32(type,var) : \
-    ((sizeof(type) == 2) ? INT_SWAP16(type,var) : \
-    (var))))
 /*
 * get and set integers from potentially unaligned locations
 */
@@ -107,16 +85,6 @@ static inline void be64_add_cpu(__be64 *a, __s64 b)
        ((__u8*)(pointer))[1] = (((value)     ) & 0xff); \
    }
-/* does not return a value */
-#define INT_SET(reference,arch,valueref) \
-    (__builtin_constant_p(valueref) ? \
-        (void)( (reference) = ( ((arch) != ARCH_NOCONVERT) ? (INT_SWAP((reference),(valueref))) : (valueref)) ) : \
-        (void)( \
-            ((reference) = (valueref)), \
-            ( ((arch) != ARCH_NOCONVERT) ? (reference) = INT_SWAP((reference),(reference)) : 0 ) \
-        ) \
-    )
 /*
 * In directories inode numbers are stored as unaligned arrays of unsigned
 * 8bit integers on disk.
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 5fde1654b430..db15feb906ff 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -45,7 +45,6 @@
 #include "xfs_error.h"
 #include "xfs_quota.h"
 #include "xfs_trans_space.h"
-#include "xfs_acl.h"
 #include "xfs_rw.h"
 #include "xfs_vnodeops.h"
@@ -249,8 +248,9 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
        /*
         * Attach the dquots to the inode.
         */
-        if ((error = XFS_QM_DQATTACH(mp, dp, 0)))
+        error = xfs_qm_dqattach(dp, 0);
-                return (error);
+        if (error)
+                return error;
        /*
         * If the inode doesn't have an attribute fork, add one.
@@ -311,7 +311,7 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
        }
        xfs_ilock(dp, XFS_ILOCK_EXCL);
-        error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, args.trans, dp, args.total, 0,
+        error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0,
                                rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
                                       XFS_QMOPT_RES_REGBLKS);
        if (error) {
@@ -501,8 +501,9 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
        /*
         * Attach the dquots to the inode.
         */
-        if ((error = XFS_QM_DQATTACH(mp, dp, 0)))
+        error = xfs_qm_dqattach(dp, 0);
-                return (error);
+        if (error)
+                return error;
        /*
         * Start our first transaction of the day.
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index ca7c6005a487..7928b9983c1d 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -2691,7 +2691,7 @@ xfs_bmap_rtalloc(
                 * Adjust the disk quota also. This was reserved
                 * earlier.
                 */
-                XFS_TRANS_MOD_DQUOT_BYINO(mp, ap->tp, ap->ip,
+                xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
                        ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT :
                                        XFS_TRANS_DQ_RTBCOUNT, (long) ralen);
        } else {
@@ -2995,7 +2995,7 @@ xfs_bmap_btalloc(
                 * Adjust the disk quota also. This was reserved
                 * earlier.
                 */
-                XFS_TRANS_MOD_DQUOT_BYINO(mp, ap->tp, ap->ip,
+                xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
                        ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT :
                                        XFS_TRANS_DQ_BCOUNT,
                        (long) args.len);
@@ -3066,7 +3066,7 @@ xfs_bmap_btree_to_extents(
                return error;
        xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp);
        ip->i_d.di_nblocks--;
-        XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
+        xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
        xfs_trans_binval(tp, cbp);
        if (cur->bc_bufs[0] == cbp)
                cur->bc_bufs[0] = NULL;
@@ -3386,7 +3386,7 @@ xfs_bmap_del_extent(
         * Adjust quota data.
         */
        if (qfield)
-                XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, qfield, (long)-nblks);
+                xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
        /*
         * Account for change in delayed indirect blocks.
@@ -3523,7 +3523,7 @@ xfs_bmap_extents_to_btree(
        *firstblock = cur->bc_private.b.firstblock = args.fsbno;
        cur->bc_private.b.allocated++;
        ip->i_d.di_nblocks++;
-        XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
+        xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
        abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0);
        /*
         * Fill in the child block.
@@ -3690,7 +3690,7 @@ xfs_bmap_local_to_extents(
                XFS_BMAP_TRACE_POST_UPDATE("new", ip, 0, whichfork);
                XFS_IFORK_NEXT_SET(ip, whichfork, 1);
                ip->i_d.di_nblocks = 1;
-                XFS_TRANS_MOD_DQUOT_BYINO(args.mp, tp, ip,
+                xfs_trans_mod_dquot_byino(tp, ip,
                        XFS_TRANS_DQ_BCOUNT, 1L);
                flags |= xfs_ilog_fext(whichfork);
        } else {
@@ -4048,7 +4048,7 @@ xfs_bmap_add_attrfork(
                        XFS_TRANS_PERM_LOG_RES, XFS_ADDAFORK_LOG_COUNT)))
                goto error0;
        xfs_ilock(ip, XFS_ILOCK_EXCL);
-        error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, blks, 0, rsvd ?
+        error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
                        XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
                        XFS_QMOPT_RES_REGBLKS);
        if (error) {
@@ -4983,10 +4983,11 @@ xfs_bmapi(
                                 * adjusted later.  We return if we haven't
                                 * allocated blocks already inside this loop.
                                 */
-                                if ((error = XFS_TRANS_RESERVE_QUOTA_NBLKS(
+                                error = xfs_trans_reserve_quota_nblks(
-                                                mp, NULL, ip, (long)alen, 0,
+                                                NULL, ip, (long)alen, 0,
                                                rt ? XFS_QMOPT_RES_RTBLKS :
-                                                     XFS_QMOPT_RES_REGBLKS))) {
+                                                     XFS_QMOPT_RES_REGBLKS);
+                                if (error) {
                                        if (n == 0) {
                                                *nmap = 0;
                                                ASSERT(cur == NULL);
@@ -5035,8 +5036,8 @@ xfs_bmapi(
                                        if (XFS_IS_QUOTA_ON(mp))
                                                /* unreserve the blocks now */
                                                (void)
-                                                XFS_TRANS_UNRESERVE_QUOTA_NBLKS(
+                                                xfs_trans_unreserve_quota_nblks(
-                                                        mp, NULL, ip,
+                                                        NULL, ip,
                                                        (long)alen, 0, rt ?
                                                        XFS_QMOPT_RES_RTBLKS :
                                                        XFS_QMOPT_RES_REGBLKS);
@@ -5691,14 +5692,14 @@ xfs_bunmapi(
                                do_div(rtexts, mp->m_sb.sb_rextsize);
                                xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS,
                                                (int64_t)rtexts, rsvd);
-                                (void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp,
+                                (void)xfs_trans_reserve_quota_nblks(NULL,
-                                        NULL, ip, -((long)del.br_blockcount), 0,
+                                        ip, -((long)del.br_blockcount), 0,
                                        XFS_QMOPT_RES_RTBLKS);
                        } else {
                                xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS,
                                                (int64_t)del.br_blockcount, rsvd);
-                                (void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp,
+                                (void)xfs_trans_reserve_quota_nblks(NULL,
-                                        NULL, ip, -((long)del.br_blockcount), 0,
+                                        ip, -((long)del.br_blockcount), 0,
                                        XFS_QMOPT_RES_REGBLKS);
                        }
                        ip->i_delayed_blks -= del.br_blockcount;
@@ -6085,6 +6086,7 @@ xfs_getbmap(
                        break;
        }
+        kmem_free(out);
        return error;
 }
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 0760d352586f..5c1ade06578e 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -590,7 +590,7 @@ xfs_bmbt_alloc_block(
        cur->bc_private.b.allocated++;
        cur->bc_private.b.ip->i_d.di_nblocks++;
        xfs_trans_log_inode(args.tp, cur->bc_private.b.ip, XFS_ILOG_CORE);
-        XFS_TRANS_MOD_DQUOT_BYINO(args.mp, args.tp, cur->bc_private.b.ip,
+        xfs_trans_mod_dquot_byino(args.tp, cur->bc_private.b.ip,
                        XFS_TRANS_DQ_BCOUNT, 1L);
        new->l = cpu_to_be64(args.fsbno);
@@ -618,7 +618,7 @@ xfs_bmbt_free_block(
        ip->i_d.di_nblocks--;
        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-        XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
+        xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
        xfs_trans_binval(tp, bp);
        return 0;
 }
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 6c87c8f304ef..edf8bdf4141f 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -542,10 +542,8 @@ xfs_filestream_associate(
         * waiting for the lock because someone else is waiting on the lock we
         * hold and we cannot drop that as we are in a transaction here.
         *
-         * Lucky for us, this inversion is rarely a problem because it's a
+         * Lucky for us, this inversion is not a problem because it's a
-         * directory inode that we are trying to lock here and that means the
+         * directory inode that we are trying to lock here.
-         * only place that matters is xfs_sync_inodes() and SYNC_DELWRI is
-         * used. i.e. freeze, remount-ro, quotasync or unmount.
         *
         * So, if we can't get the iolock without sleeping then just give up
         */
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index f7c06fac8229..c4ea51b55dce 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -239,10 +239,13 @@ typedef struct xfs_fsop_resblks {
 * Minimum and maximum sizes need for growth checks
 */
 #define XFS_MIN_AG_BLOCKS       64
-#define XFS_MIN_LOG_BLOCKS      512
+#define XFS_MIN_LOG_BLOCKS      512ULL
-#define XFS_MAX_LOG_BLOCKS      (64 * 1024)
+#define XFS_MAX_LOG_BLOCKS      (1024 * 1024ULL)
-#define XFS_MIN_LOG_BYTES       (256 * 1024)
+#define XFS_MIN_LOG_BYTES       (10 * 1024 * 1024ULL)
-#define XFS_MAX_LOG_BYTES       (128 * 1024 * 1024)
+/* keep the maximum size under 2^31 by a small amount */
+#define XFS_MAX_LOG_BYTES \
+        ((2 * 1024 * 1024 * 1024ULL) - XFS_MIN_LOG_BYTES)
 /*
 * Structures for XFS_IOC_FSGROWFSDATA, XFS_IOC_FSGROWFSLOG & XFS_IOC_FSGROWFSRT
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 89b81eedce6a..76c540f719e4 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -18,6 +18,7 @@
 #include "xfs.h"
 #include "xfs_fs.h"
 #include "xfs_types.h"
+#include "xfs_acl.h"
 #include "xfs_bit.h"
 #include "xfs_log.h"
 #include "xfs_inum.h"
@@ -82,6 +83,7 @@ xfs_inode_alloc(
        memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
        ip->i_size = 0;
        ip->i_new_size = 0;
+        xfs_inode_init_acls(ip);
        /*
         * Initialize inode's trace buffers.
@@ -500,10 +502,7 @@ xfs_ireclaim(
         * ilock one but will still hold the iolock.
         */
        xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-        /*
+        xfs_qm_dqdetach(ip);
-         * Release dquots (and their references) if any.
-         */
-        XFS_QM_DQDETACH(ip->i_mount, ip);
        xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
        switch (ip->i_d.di_mode & S_IFMT) {
@@ -561,6 +560,7 @@ xfs_ireclaim(
        ASSERT(atomic_read(&ip->i_pincount) == 0);
        ASSERT(!spin_is_locked(&ip->i_flags_lock));
        ASSERT(completion_done(&ip->i_flush));
+        xfs_inode_clear_acls(ip);
        kmem_zone_free(xfs_inode_zone, ip);
 }
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 123b20c8cbf2..1f22d65fed0a 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -49,7 +49,6 @@
 #include "xfs_utils.h"
 #include "xfs_dir2_trace.h"
 #include "xfs_quota.h"
-#include "xfs_acl.h"
 #include "xfs_filestream.h"
 #include "xfs_vnodeops.h"
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index f879c1bc4b96..77016702938b 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -18,6 +18,7 @@
 #ifndef __XFS_INODE_H__
 #define __XFS_INODE_H__
+struct posix_acl;
 struct xfs_dinode;
 struct xfs_inode;
@@ -272,6 +273,11 @@ typedef struct xfs_inode {
        /* VFS inode */
        struct inode            i_vnode;        /* embedded VFS inode */
+#ifdef CONFIG_XFS_POSIX_ACL
+        struct posix_acl        *i_acl;
+        struct posix_acl        *i_default_acl;
+#endif
        /* Trace buffers per inode. */
 #ifdef XFS_INODE_TRACE
        struct ktrace           *i_trace;       /* general inode trace */
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 5aaa2d7ec155..67ae5555a30a 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -42,7 +42,6 @@
 #include "xfs_error.h"
 #include "xfs_itable.h"
 #include "xfs_rw.h"
-#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_trans_space.h"
@@ -385,7 +384,7 @@ xfs_iomap_write_direct(
         * Make sure that the dquots are there. This doesn't hold
         * the ilock across a disk read.
         */
-        error = XFS_QM_DQATTACH(ip->i_mount, ip, XFS_QMOPT_ILOCKED);
+        error = xfs_qm_dqattach_locked(ip, 0);
        if (error)
                return XFS_ERROR(error);
@@ -444,8 +443,7 @@ xfs_iomap_write_direct(
        if (error)
                goto error_out;
-        error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip,
+        error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
-                                              qblocks, 0, quota_flag);
        if (error)
                goto error1;
@@ -495,7 +493,7 @@ xfs_iomap_write_direct(
 error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
        xfs_bmap_cancel(&free_list);
-        XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag);
+        xfs_trans_unreserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
 error1: /* Just cancel transaction */
        xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
@@ -582,7 +580,7 @@ xfs_iomap_write_delay(
         * Make sure that the dquots are there. This doesn't hold
         * the ilock across a disk read.
         */
-        error = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED);
+        error = xfs_qm_dqattach_locked(ip, 0);
        if (error)
                return XFS_ERROR(error);
@@ -684,7 +682,8 @@ xfs_iomap_write_allocate(
        /*
         * Make sure that the dquots are there.
         */
-        if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
+        error = xfs_qm_dqattach(ip, 0);
+        if (error)
                return XFS_ERROR(error);
        offset_fsb = XFS_B_TO_FSBT(mp, offset);
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 7ba450116d4f..47da2fb45377 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1975,16 +1975,30 @@ xlog_recover_do_reg_buffer(
                error = 0;
                if (buf_f->blf_flags &
                   (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) {
+                        if (item->ri_buf[i].i_addr == NULL) {
+                                cmn_err(CE_ALERT,
+                                        "XFS: NULL dquot in %s.", __func__);
+                                goto next;
+                        }
+                        if (item->ri_buf[i].i_len < sizeof(xfs_dqblk_t)) {
+                                cmn_err(CE_ALERT,
+                                        "XFS: dquot too small (%d) in %s.",
+                                        item->ri_buf[i].i_len, __func__);
+                                goto next;
+                        }
                        error = xfs_qm_dqcheck((xfs_disk_dquot_t *)
                                               item->ri_buf[i].i_addr,
                                               -1, 0, XFS_QMOPT_DOWARN,
                                               "dquot_buf_recover");
+                        if (error)
+                                goto next;
                }
-                if (!error)
-                        memcpy(xfs_buf_offset(bp,
+                memcpy(xfs_buf_offset(bp,
-                                (uint)bit << XFS_BLI_SHIFT),    /* dest */
+                        (uint)bit << XFS_BLI_SHIFT),    /* dest */
-                                item->ri_buf[i].i_addr,         /* source */
+                        item->ri_buf[i].i_addr,         /* source */
-                                nbits<<XFS_BLI_SHIFT);          /* length */
+                        nbits<<XFS_BLI_SHIFT);          /* length */
+ next:
                i++;
                bit += nbits;
        }
@@ -2615,7 +2629,19 @@ xlog_recover_do_dquot_trans(
                return (0);
        recddq = (xfs_disk_dquot_t *)item->ri_buf[1].i_addr;
-        ASSERT(recddq);
+        if (item->ri_buf[1].i_addr == NULL) {
+                cmn_err(CE_ALERT,
+                        "XFS: NULL dquot in %s.", __func__);
+                return XFS_ERROR(EIO);
+        }
+        if (item->ri_buf[1].i_len < sizeof(xfs_dqblk_t)) {
+                cmn_err(CE_ALERT,
+                        "XFS: dquot too small (%d) in %s.",
+                        item->ri_buf[1].i_len, __func__);
+                return XFS_ERROR(EIO);
+        }
        /*
         * This type of quotas was turned off, so ignore this record.
         */
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 65a99725d0cc..5c6f092659c1 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -960,6 +960,53 @@ xfs_check_sizes(xfs_mount_t *mp)
 }
 /*
+ * Clear the quotaflags in memory and in the superblock.
+ */
+int
+xfs_mount_reset_sbqflags(
+        struct xfs_mount        *mp)
+{
+        int                     error;
+        struct xfs_trans        *tp;
+        mp->m_qflags = 0;
+        /*
+         * It is OK to look at sb_qflags here in mount path,
+         * without m_sb_lock.
+         */
+        if (mp->m_sb.sb_qflags == 0)
+                return 0;
+        spin_lock(&mp->m_sb_lock);
+        mp->m_sb.sb_qflags = 0;
+        spin_unlock(&mp->m_sb_lock);
+        /*
+         * If the fs is readonly, let the incore superblock run
+         * with quotas off but don't flush the update out to disk
+         */
+        if (mp->m_flags & XFS_MOUNT_RDONLY)
+                return 0;
+#ifdef QUOTADEBUG
+        xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes");
+#endif
+        tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
+        error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
+                                      XFS_DEFAULT_LOG_COUNT);
+        if (error) {
+                xfs_trans_cancel(tp, 0);
+                xfs_fs_cmn_err(CE_ALERT, mp,
+                        "xfs_mount_reset_sbqflags: Superblock update failed!");
+                return error;
+        }
+        xfs_mod_sb(tp, XFS_SB_QFLAGS);
+        return xfs_trans_commit(tp, 0);
+}
+/*
 * This function does the following on an initial mount of a file system:
 *      - reads the superblock from disk and init the mount struct
 *      - if we're a 32-bit kernel, do a size check on the superblock
@@ -976,7 +1023,8 @@ xfs_mountfs(
        xfs_sb_t        *sbp = &(mp->m_sb);
        xfs_inode_t     *rip;
        __uint64_t      resblks;
-        uint            quotamount, quotaflags;
+        uint            quotamount = 0;
+        uint            quotaflags = 0;
        int             error = 0;
        xfs_mount_common(mp, sbp);
@@ -1210,9 +1258,28 @@ xfs_mountfs(
        /*
         * Initialise the XFS quota management subsystem for this mount
         */
-        error = XFS_QM_INIT(mp, &quotamount, &quotaflags);
+        if (XFS_IS_QUOTA_RUNNING(mp)) {
-        if (error)
+                error = xfs_qm_newmount(mp, &quotamount, &quotaflags);
-                goto out_rtunmount;
+                if (error)
+                        goto out_rtunmount;
+        } else {
+                ASSERT(!XFS_IS_QUOTA_ON(mp));
+                /*
+                 * If a file system had quotas running earlier, but decided to
+                 * mount without -o uquota/pquota/gquota options, revoke the
+                 * quotachecked license.
+                 */
+                if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) {
+                        cmn_err(CE_NOTE,
+                                "XFS: resetting qflags for filesystem %s",
+                                mp->m_fsname);
+                        error = xfs_mount_reset_sbqflags(mp);
+                        if (error)
+                                return error;
+                }
+        }
        /*
         * Finish recovering the file system.  This part needed to be
@@ -1228,9 +1295,19 @@ xfs_mountfs(
        /*
         * Complete the quota initialisation, post-log-replay component.
         */
-        error = XFS_QM_MOUNT(mp, quotamount, quotaflags);
+        if (quotamount) {
-        if (error)
+                ASSERT(mp->m_qflags == 0);
-                goto out_rtunmount;
+                mp->m_qflags = quotaflags;
+                xfs_qm_mount_quotas(mp);
+        }
+#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
+        if (XFS_IS_QUOTA_ON(mp))
+                xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas turned on");
+        else
+                xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas not turned on");
+#endif
        /*
         * Now we are mounted, reserve a small amount of unused space for
@@ -1279,12 +1356,7 @@ xfs_unmountfs(
        __uint64_t              resblks;
        int                     error;
-        /*
+        xfs_qm_unmount_quotas(mp);
-         * Release dquot that rootinode, rbmino and rsumino might be holding,
-         * and release the quota inodes.
-         */
-        XFS_QM_UNMOUNT(mp);
        xfs_rtunmount_inodes(mp);
        IRELE(mp->m_rootip);
@@ -1299,12 +1371,9 @@ xfs_unmountfs(
         * need to force the log first.
         */
        xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC);
-        xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_ASYNC);
+        xfs_reclaim_inodes(mp, XFS_IFLUSH_ASYNC);
-        XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING);
-        if (mp->m_quotainfo)
+        xfs_qm_unmount(mp);
-                XFS_QM_DONE(mp);
        /*
         * Flush out the log synchronously so that we know for sure
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index d6a64392f983..a5122382afde 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -64,6 +64,8 @@ struct xfs_swapext;
 struct xfs_mru_cache;
 struct xfs_nameops;
 struct xfs_ail;
+struct xfs_quotainfo;
 /*
 * Prototypes and functions for the Data Migration subsystem.
@@ -107,86 +109,6 @@ typedef struct xfs_dmops {
        (*(mp)->m_dm_ops->xfs_send_unmount)(mp,ip,right,mode,rval,fl)
-/*
- * Prototypes and functions for the Quota Management subsystem.
- */
-struct xfs_dquot;
-struct xfs_dqtrxops;
-struct xfs_quotainfo;
-typedef int     (*xfs_qminit_t)(struct xfs_mount *, uint *, uint *);
-typedef int     (*xfs_qmmount_t)(struct xfs_mount *, uint, uint);
-typedef void    (*xfs_qmunmount_t)(struct xfs_mount *);
-typedef void    (*xfs_qmdone_t)(struct xfs_mount *);
-typedef void    (*xfs_dqrele_t)(struct xfs_dquot *);
-typedef int     (*xfs_dqattach_t)(struct xfs_inode *, uint);
-typedef void    (*xfs_dqdetach_t)(struct xfs_inode *);
-typedef int     (*xfs_dqpurgeall_t)(struct xfs_mount *, uint);
-typedef int     (*xfs_dqvopalloc_t)(struct xfs_mount *,
-                        struct xfs_inode *, uid_t, gid_t, prid_t, uint,
-                        struct xfs_dquot **, struct xfs_dquot **);
-typedef void    (*xfs_dqvopcreate_t)(struct xfs_trans *, struct xfs_inode *,
-                        struct xfs_dquot *, struct xfs_dquot *);
-typedef int     (*xfs_dqvoprename_t)(struct xfs_inode **);
-typedef struct xfs_dquot * (*xfs_dqvopchown_t)(
-                        struct xfs_trans *, struct xfs_inode *,
-                        struct xfs_dquot **, struct xfs_dquot *);
-typedef int     (*xfs_dqvopchownresv_t)(struct xfs_trans *, struct xfs_inode *,
-                        struct xfs_dquot *, struct xfs_dquot *, uint);
-typedef void    (*xfs_dqstatvfs_t)(struct xfs_inode *, struct kstatfs *);
-typedef int     (*xfs_dqsync_t)(struct xfs_mount *, int flags);
-typedef struct xfs_qmops {
-        xfs_qminit_t            xfs_qminit;
-        xfs_qmdone_t            xfs_qmdone;
-        xfs_qmmount_t           xfs_qmmount;
-        xfs_qmunmount_t         xfs_qmunmount;
-        xfs_dqrele_t            xfs_dqrele;
-        xfs_dqattach_t          xfs_dqattach;
-        xfs_dqdetach_t          xfs_dqdetach;
-        xfs_dqpurgeall_t        xfs_dqpurgeall;
-        xfs_dqvopalloc_t        xfs_dqvopalloc;
-        xfs_dqvopcreate_t       xfs_dqvopcreate;
-        xfs_dqvoprename_t       xfs_dqvoprename;
-        xfs_dqvopchown_t        xfs_dqvopchown;
-        xfs_dqvopchownresv_t    xfs_dqvopchownresv;
-        xfs_dqstatvfs_t         xfs_dqstatvfs;
-        xfs_dqsync_t            xfs_dqsync;
-        struct xfs_dqtrxops     *xfs_dqtrxops;
-} xfs_qmops_t;
-#define XFS_QM_INIT(mp, mnt, fl) \
-        (*(mp)->m_qm_ops->xfs_qminit)(mp, mnt, fl)
-#define XFS_QM_MOUNT(mp, mnt, fl) \
-        (*(mp)->m_qm_ops->xfs_qmmount)(mp, mnt, fl)
-#define XFS_QM_UNMOUNT(mp) \
-        (*(mp)->m_qm_ops->xfs_qmunmount)(mp)
-#define XFS_QM_DONE(mp) \
-        (*(mp)->m_qm_ops->xfs_qmdone)(mp)
-#define XFS_QM_DQRELE(mp, dq) \
-        (*(mp)->m_qm_ops->xfs_dqrele)(dq)
-#define XFS_QM_DQATTACH(mp, ip, fl) \
-        (*(mp)->m_qm_ops->xfs_dqattach)(ip, fl)
-#define XFS_QM_DQDETACH(mp, ip) \
-        (*(mp)->m_qm_ops->xfs_dqdetach)(ip)
-#define XFS_QM_DQPURGEALL(mp, fl) \
-        (*(mp)->m_qm_ops->xfs_dqpurgeall)(mp, fl)
-#define XFS_QM_DQVOPALLOC(mp, ip, uid, gid, prid, fl, dq1, dq2) \
-        (*(mp)->m_qm_ops->xfs_dqvopalloc)(mp, ip, uid, gid, prid, fl, dq1, dq2)
-#define XFS_QM_DQVOPCREATE(mp, tp, ip, dq1, dq2) \
-        (*(mp)->m_qm_ops->xfs_dqvopcreate)(tp, ip, dq1, dq2)
-#define XFS_QM_DQVOPRENAME(mp, ip) \
-        (*(mp)->m_qm_ops->xfs_dqvoprename)(ip)
-#define XFS_QM_DQVOPCHOWN(mp, tp, ip, dqp, dq) \
-        (*(mp)->m_qm_ops->xfs_dqvopchown)(tp, ip, dqp, dq)
-#define XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, dq1, dq2, fl) \
-        (*(mp)->m_qm_ops->xfs_dqvopchownresv)(tp, ip, dq1, dq2, fl)
-#define XFS_QM_DQSTATVFS(ip, statp) \
-        (*(ip)->i_mount->m_qm_ops->xfs_dqstatvfs)(ip, statp)
-#define XFS_QM_DQSYNC(mp, flags) \
-        (*(mp)->m_qm_ops->xfs_dqsync)(mp, flags)
 #ifdef HAVE_PERCPU_SB
 /*
@@ -510,8 +432,6 @@ extern int	xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t);
 extern int      xfs_dmops_get(struct xfs_mount *);
 extern void     xfs_dmops_put(struct xfs_mount *);
-extern int      xfs_qmops_get(struct xfs_mount *);
-extern void     xfs_qmops_put(struct xfs_mount *);
 extern struct xfs_dmops xfs_dmcore_xfs;
diff --git a/fs/xfs/xfs_qmops.c b/fs/xfs/xfs_qmops.c
deleted file mode 100644
index e101790ea8e7..000000000000
--- a/fs/xfs/xfs_qmops.c
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_types.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
-#include "xfs_mount.h"
-#include "xfs_quota.h"
-#include "xfs_error.h"
-STATIC struct xfs_dquot *
-xfs_dqvopchown_default(
-        struct xfs_trans        *tp,
-        struct xfs_inode        *ip,
-        struct xfs_dquot        **dqp,
-        struct xfs_dquot        *dq)
-{
-        return NULL;
-}
-/*
- * Clear the quotaflags in memory and in the superblock.
- */
-int
-xfs_mount_reset_sbqflags(xfs_mount_t *mp)
-{
-        int                     error;
-        xfs_trans_t             *tp;
-        mp->m_qflags = 0;
-        /*
-         * It is OK to look at sb_qflags here in mount path,
-         * without m_sb_lock.
-         */
-        if (mp->m_sb.sb_qflags == 0)
-                return 0;
-        spin_lock(&mp->m_sb_lock);
-        mp->m_sb.sb_qflags = 0;
-        spin_unlock(&mp->m_sb_lock);
-        /*
-         * if the fs is readonly, let the incore superblock run
-         * with quotas off but don't flush the update out to disk
-         */
-        if (mp->m_flags & XFS_MOUNT_RDONLY)
-                return 0;
-#ifdef QUOTADEBUG
-        xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes");
-#endif
-        tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
-        if ((error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
-                                      XFS_DEFAULT_LOG_COUNT))) {
-                xfs_trans_cancel(tp, 0);
-                xfs_fs_cmn_err(CE_ALERT, mp,
-                        "xfs_mount_reset_sbqflags: Superblock update failed!");
-                return error;
-        }
-        xfs_mod_sb(tp, XFS_SB_QFLAGS);
-        error = xfs_trans_commit(tp, 0);
-        return error;
-}
-STATIC int
-xfs_noquota_init(
-        xfs_mount_t     *mp,
-        uint            *needquotamount,
-        uint            *quotaflags)
-{
-        int             error = 0;
-        *quotaflags = 0;
-        *needquotamount = B_FALSE;
-        ASSERT(!XFS_IS_QUOTA_ON(mp));
-        /*
-         * If a file system had quotas running earlier, but decided to
-         * mount without -o uquota/pquota/gquota options, revoke the
-         * quotachecked license.
-         */
-        if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) {
-                cmn_err(CE_NOTE,
-                        "XFS resetting qflags for filesystem %s",
-                        mp->m_fsname);
-                error = xfs_mount_reset_sbqflags(mp);
-        }
-        return error;
-}
-static struct xfs_qmops xfs_qmcore_stub = {
-        .xfs_qminit             = (xfs_qminit_t) xfs_noquota_init,
-        .xfs_qmdone             = (xfs_qmdone_t) fs_noerr,
-        .xfs_qmmount            = (xfs_qmmount_t) fs_noerr,
-        .xfs_qmunmount          = (xfs_qmunmount_t) fs_noerr,
-        .xfs_dqrele             = (xfs_dqrele_t) fs_noerr,
-        .xfs_dqattach           = (xfs_dqattach_t) fs_noerr,
-        .xfs_dqdetach           = (xfs_dqdetach_t) fs_noerr,
-        .xfs_dqpurgeall         = (xfs_dqpurgeall_t) fs_noerr,
-        .xfs_dqvopalloc         = (xfs_dqvopalloc_t) fs_noerr,
-        .xfs_dqvopcreate        = (xfs_dqvopcreate_t) fs_noerr,
-        .xfs_dqvoprename        = (xfs_dqvoprename_t) fs_noerr,
-        .xfs_dqvopchown         = xfs_dqvopchown_default,
-        .xfs_dqvopchownresv     = (xfs_dqvopchownresv_t) fs_noerr,
-        .xfs_dqstatvfs          = (xfs_dqstatvfs_t) fs_noval,
-        .xfs_dqsync             = (xfs_dqsync_t) fs_noerr,
-};
-int
-xfs_qmops_get(struct xfs_mount *mp)
-{
-        if (XFS_IS_QUOTA_RUNNING(mp)) {
-#ifdef CONFIG_XFS_QUOTA
-                mp->m_qm_ops = &xfs_qmcore_xfs;
-#else
-                cmn_err(CE_WARN,
-                        "XFS: qouta support not available in this kernel.");
-                return EINVAL;
-#endif
-        } else {
-                mp->m_qm_ops = &xfs_qmcore_stub;
-        }
-        return 0;
-}
-void
-xfs_qmops_put(struct xfs_mount *mp)
-{
-}
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index f5d1202dde25..3ec91ac74c2a 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -197,7 +197,6 @@ typedef struct xfs_qoff_logformat {
 #define XFS_QMOPT_UMOUNTING     0x0000100 /* filesys is being unmounted */
 #define XFS_QMOPT_DOLOG         0x0000200 /* log buf changes (in quotacheck) */
 #define XFS_QMOPT_DOWARN        0x0000400 /* increase warning cnt if needed */
-#define XFS_QMOPT_ILOCKED       0x0000800 /* inode is already locked (excl) */
 #define XFS_QMOPT_DQREPAIR      0x0001000 /* repair dquot if damaged */
 #define XFS_QMOPT_GQUOTA        0x0002000 /* group dquot requested */
 #define XFS_QMOPT_ENOSPC        0x0004000 /* enospc instead of edquot (prj) */
@@ -302,69 +301,79 @@ typedef struct xfs_dqtrx {
        long            qt_delrtb_delta;  /* delayed RT blk count changes */
 } xfs_dqtrx_t;
-/*
+#ifdef CONFIG_XFS_QUOTA
- * Dquot transaction functions, used if quota is enabled.
+extern void xfs_trans_dup_dqinfo(struct xfs_trans *, struct xfs_trans *);
- */
+extern void xfs_trans_free_dqinfo(struct xfs_trans *);
-typedef void    (*qo_dup_dqinfo_t)(struct xfs_trans *, struct xfs_trans *);
+extern void xfs_trans_mod_dquot_byino(struct xfs_trans *, struct xfs_inode *,
-typedef void    (*qo_mod_dquot_byino_t)(struct xfs_trans *,
+                uint, long);
-                                struct xfs_inode *, uint, long);
+extern void xfs_trans_apply_dquot_deltas(struct xfs_trans *);
-typedef void    (*qo_free_dqinfo_t)(struct xfs_trans *);
+extern void xfs_trans_unreserve_and_mod_dquots(struct xfs_trans *);
-typedef void    (*qo_apply_dquot_deltas_t)(struct xfs_trans *);
+extern int xfs_trans_reserve_quota_nblks(struct xfs_trans *,
-typedef void    (*qo_unreserve_and_mod_dquots_t)(struct xfs_trans *);
+                struct xfs_inode *, long, long, uint);
-typedef int     (*qo_reserve_quota_nblks_t)(
+extern int xfs_trans_reserve_quota_bydquots(struct xfs_trans *,
-                                struct xfs_trans *, struct xfs_mount *,
+                struct xfs_mount *, struct xfs_dquot *,
-                                struct xfs_inode *, long, long, uint);
+                struct xfs_dquot *, long, long, uint);
-typedef int     (*qo_reserve_quota_bydquots_t)(
-                                struct xfs_trans *, struct xfs_mount *,
+extern int xfs_qm_vop_dqalloc(struct xfs_inode *, uid_t, gid_t, prid_t, uint,
-                                struct xfs_dquot *, struct xfs_dquot *,
+                struct xfs_dquot **, struct xfs_dquot **);
-                                long, long, uint);
+extern void xfs_qm_vop_create_dqattach(struct xfs_trans *, struct xfs_inode *,
-typedef struct xfs_dqtrxops {
+                struct xfs_dquot *, struct xfs_dquot *);
-        qo_dup_dqinfo_t                 qo_dup_dqinfo;
+extern int xfs_qm_vop_rename_dqattach(struct xfs_inode **);
-        qo_free_dqinfo_t                qo_free_dqinfo;
+extern struct xfs_dquot *xfs_qm_vop_chown(struct xfs_trans *,
-        qo_mod_dquot_byino_t            qo_mod_dquot_byino;
+                struct xfs_inode *, struct xfs_dquot **, struct xfs_dquot *);
-        qo_apply_dquot_deltas_t         qo_apply_dquot_deltas;
+extern int xfs_qm_vop_chown_reserve(struct xfs_trans *, struct xfs_inode *,
-        qo_reserve_quota_nblks_t        qo_reserve_quota_nblks;
+                struct xfs_dquot *, struct xfs_dquot *, uint);
-        qo_reserve_quota_bydquots_t     qo_reserve_quota_bydquots;
+extern int xfs_qm_dqattach(struct xfs_inode *, uint);
-        qo_unreserve_and_mod_dquots_t   qo_unreserve_and_mod_dquots;
+extern int xfs_qm_dqattach_locked(struct xfs_inode *, uint);
-} xfs_dqtrxops_t;
+extern void xfs_qm_dqdetach(struct xfs_inode *);
+extern void xfs_qm_dqrele(struct xfs_dquot *);
-#define XFS_DQTRXOP(mp, tp, op, args...) \
+extern void xfs_qm_statvfs(struct xfs_inode *, struct kstatfs *);
-                ((mp)->m_qm_ops->xfs_dqtrxops ? \
+extern int xfs_qm_sync(struct xfs_mount *, int);
-                ((mp)->m_qm_ops->xfs_dqtrxops->op)(tp, ## args) : 0)
+extern int xfs_qm_newmount(struct xfs_mount *, uint *, uint *);
+extern void xfs_qm_mount_quotas(struct xfs_mount *);
-#define XFS_DQTRXOP_VOID(mp, tp, op, args...) \
+extern void xfs_qm_unmount(struct xfs_mount *);
-                ((mp)->m_qm_ops->xfs_dqtrxops ? \
+extern void xfs_qm_unmount_quotas(struct xfs_mount *);
-                ((mp)->m_qm_ops->xfs_dqtrxops->op)(tp, ## args) : (void)0)
+#else
-#define XFS_TRANS_DUP_DQINFO(mp, otp, ntp) \
+static inline int
-        XFS_DQTRXOP_VOID(mp, otp, qo_dup_dqinfo, ntp)
+xfs_qm_vop_dqalloc(struct xfs_inode *ip, uid_t uid, gid_t gid, prid_t prid,
-#define XFS_TRANS_FREE_DQINFO(mp, tp) \
+                uint flags, struct xfs_dquot **udqp, struct xfs_dquot **gdqp)
-        XFS_DQTRXOP_VOID(mp, tp, qo_free_dqinfo)
+{
-#define XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, field, delta) \
+        *udqp = NULL;
-        XFS_DQTRXOP_VOID(mp, tp, qo_mod_dquot_byino, ip, field, delta)
+        *gdqp = NULL;
-#define XFS_TRANS_APPLY_DQUOT_DELTAS(mp, tp) \
+        return 0;
-        XFS_DQTRXOP_VOID(mp, tp, qo_apply_dquot_deltas)
+}
-#define XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, nblks, ninos, fl) \
+#define xfs_trans_dup_dqinfo(tp, tp2)
-        XFS_DQTRXOP(mp, tp, qo_reserve_quota_nblks, mp, ip, nblks, ninos, fl)
+#define xfs_trans_free_dqinfo(tp)
-#define XFS_TRANS_RESERVE_QUOTA_BYDQUOTS(mp, tp, ud, gd, nb, ni, fl) \
+#define xfs_trans_mod_dquot_byino(tp, ip, fields, delta)
-        XFS_DQTRXOP(mp, tp, qo_reserve_quota_bydquots, mp, ud, gd, nb, ni, fl)
+#define xfs_trans_apply_dquot_deltas(tp)
-#define XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(mp, tp) \
+#define xfs_trans_unreserve_and_mod_dquots(tp)
-        XFS_DQTRXOP_VOID(mp, tp, qo_unreserve_and_mod_dquots)
+#define xfs_trans_reserve_quota_nblks(tp, ip, nblks, ninos, flags)      (0)
+#define xfs_trans_reserve_quota_bydquots(tp, mp, u, g, nb, ni, fl)      (0)
-#define XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, nblks, ninos, flags) \
+#define xfs_qm_vop_create_dqattach(tp, ip, u, g)
-        XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, -(nblks), -(ninos), flags)
+#define xfs_qm_vop_rename_dqattach(it)                                  (0)
-#define XFS_TRANS_RESERVE_QUOTA(mp, tp, ud, gd, nb, ni, f) \
+#define xfs_qm_vop_chown(tp, ip, old, new)                              (NULL)
-        XFS_TRANS_RESERVE_QUOTA_BYDQUOTS(mp, tp, ud, gd, nb, ni, \
+#define xfs_qm_vop_chown_reserve(tp, ip, u, g, fl)                      (0)
-                                f | XFS_QMOPT_RES_REGBLKS)
+#define xfs_qm_dqattach(ip, fl)                                         (0)
-#define XFS_TRANS_UNRESERVE_QUOTA(mp, tp, ud, gd, nb, ni, f) \
+#define xfs_qm_dqattach_locked(ip, fl)                                  (0)
-        XFS_TRANS_RESERVE_QUOTA_BYDQUOTS(mp, tp, ud, gd, -(nb), -(ni), \
+#define xfs_qm_dqdetach(ip)
+#define xfs_qm_dqrele(d)
+#define xfs_qm_statvfs(ip, s)
+#define xfs_qm_sync(mp, fl)                                             (0)
+#define xfs_qm_newmount(mp, a, b)                                       (0)
+#define xfs_qm_mount_quotas(mp)
+#define xfs_qm_unmount(mp)
+#define xfs_qm_unmount_quotas(mp)                                       (0)
+#endif /* CONFIG_XFS_QUOTA */
+#define xfs_trans_unreserve_quota_nblks(tp, ip, nblks, ninos, flags) \
+        xfs_trans_reserve_quota_nblks(tp, ip, -(nblks), -(ninos), flags)
+#define xfs_trans_reserve_quota(tp, mp, ud, gd, nb, ni, f) \
+        xfs_trans_reserve_quota_bydquots(tp, mp, ud, gd, nb, ni, \
                                f | XFS_QMOPT_RES_REGBLKS)
 extern int xfs_qm_dqcheck(xfs_disk_dquot_t *, xfs_dqid_t, uint, uint, char *);
 extern int xfs_mount_reset_sbqflags(struct xfs_mount *);
-extern struct xfs_qmops xfs_qmcore_xfs;
 #endif  /* __KERNEL__ */
 #endif  /* __XFS_QUOTA_H__ */
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index 58f85e9cd11d..b81deea0ce19 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -166,7 +166,8 @@ xfs_rename(
        /*
         * Attach the dquots to the inodes
         */
-        if ((error = XFS_QM_DQVOPRENAME(mp, inodes))) {
+        error = xfs_qm_vop_rename_dqattach(inodes);
+        if (error) {
                xfs_trans_cancel(tp, cancel_flags);
                goto std_return;
        }
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index 36f3a21c54d2..fea68615ed23 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -41,7 +41,6 @@
 #include "xfs_ialloc.h"
 #include "xfs_attr.h"
 #include "xfs_bmap.h"
-#include "xfs_acl.h"
 #include "xfs_error.h"
 #include "xfs_buf_item.h"
 #include "xfs_rw.h"
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index bcc39d358ad3..66b849358e62 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -297,7 +297,7 @@ xfs_trans_dup(
        tp->t_rtx_res = tp->t_rtx_res_used;
        ntp->t_pflags = tp->t_pflags;
-        XFS_TRANS_DUP_DQINFO(tp->t_mountp, tp, ntp);
+        xfs_trans_dup_dqinfo(tp, ntp);
        atomic_inc(&tp->t_mountp->m_active_trans);
        return ntp;
@@ -829,7 +829,7 @@ shut_us_down:
                 * means is that we have some (non-persistent) quota
                 * reservations that need to be unreserved.
                 */
-                XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(mp, tp);
+                xfs_trans_unreserve_and_mod_dquots(tp);
                if (tp->t_ticket) {
                        commit_lsn = xfs_log_done(mp, tp->t_ticket,
                                                        NULL, log_flags);
@@ -848,10 +848,9 @@ shut_us_down:
        /*
         * If we need to update the superblock, then do it now.
         */
-        if (tp->t_flags & XFS_TRANS_SB_DIRTY) {
+        if (tp->t_flags & XFS_TRANS_SB_DIRTY)
                xfs_trans_apply_sb_deltas(tp);
-        }
+        xfs_trans_apply_dquot_deltas(tp);
-        XFS_TRANS_APPLY_DQUOT_DELTAS(mp, tp);
        /*
         * Ask each log item how many log_vector entries it will
@@ -1056,7 +1055,7 @@ xfs_trans_uncommit(
        }
        xfs_trans_unreserve_and_mod_sb(tp);
-        XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(tp->t_mountp, tp);
+        xfs_trans_unreserve_and_mod_dquots(tp);
        xfs_trans_free_items(tp, flags);
        xfs_trans_free_busy(tp);
@@ -1181,7 +1180,7 @@ xfs_trans_cancel(
        }
 #endif
        xfs_trans_unreserve_and_mod_sb(tp);
-        XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(mp, tp);
+        xfs_trans_unreserve_and_mod_dquots(tp);
        if (tp->t_ticket) {
                if (flags & XFS_TRANS_RELEASE_LOG_RES) {
@@ -1211,7 +1210,7 @@ xfs_trans_free(
        xfs_trans_t     *tp)
 {
        atomic_dec(&tp->t_mountp->m_active_trans);
-        XFS_TRANS_FREE_DQINFO(tp->t_mountp, tp);
+        xfs_trans_free_dqinfo(tp);
        kmem_zone_free(xfs_trans_zone, tp);
 }
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index 79b9e5ea5359..4d88616bde91 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -166,7 +166,7 @@ xfs_dir_ialloc(
                        xfs_buf_relse(ialloc_context);
                        if (dqinfo) {
                                tp->t_dqinfo = dqinfo;
-                                XFS_TRANS_FREE_DQINFO(tp->t_mountp, tp);
+                                xfs_trans_free_dqinfo(tp);
                        }
                        *tpp = ntp;
                        *ipp = NULL;
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 19cf90a9c762..c4eca5ed5dab 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -42,6 +42,7 @@
 #include "xfs_ialloc.h"
 #include "xfs_alloc.h"
 #include "xfs_bmap.h"
+#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_rw.h"
 #include "xfs_error.h"
@@ -118,7 +119,7 @@ xfs_setattr(
                 */
                ASSERT(udqp == NULL);
                ASSERT(gdqp == NULL);
-                code = XFS_QM_DQVOPALLOC(mp, ip, uid, gid, ip->i_d.di_projid,
+                code = xfs_qm_vop_dqalloc(ip, uid, gid, ip->i_d.di_projid,
                                         qflags, &udqp, &gdqp);
                if (code)
                        return code;
@@ -180,10 +181,11 @@ xfs_setattr(
                 * Do a quota reservation only if uid/gid is actually
                 * going to change.
                 */
-                if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
+                if (XFS_IS_QUOTA_RUNNING(mp) &&
-                    (XFS_IS_GQUOTA_ON(mp) && igid != gid)) {
+                    ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
+                     (XFS_IS_GQUOTA_ON(mp) && igid != gid))) {
                        ASSERT(tp);
-                        code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp,
+                        code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
                                                capable(CAP_FOWNER) ?
                                                XFS_QMOPT_FORCE_RES : 0);
                        if (code)       /* out of quota */
@@ -217,7 +219,7 @@ xfs_setattr(
                /*
                 * Make sure that the dquots are attached to the inode.
                 */
-                code = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED);
+                code = xfs_qm_dqattach_locked(ip, 0);
                if (code)
                        goto error_return;
@@ -351,21 +353,21 @@ xfs_setattr(
                 * in the transaction.
                 */
                if (iuid != uid) {
-                        if (XFS_IS_UQUOTA_ON(mp)) {
+                        if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) {
                                ASSERT(mask & ATTR_UID);
                                ASSERT(udqp);
-                                olddquot1 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
+                                olddquot1 = xfs_qm_vop_chown(tp, ip,
                                                        &ip->i_udquot, udqp);
                        }
                        ip->i_d.di_uid = uid;
                        inode->i_uid = uid;
                }
                if (igid != gid) {
-                        if (XFS_IS_GQUOTA_ON(mp)) {
+                        if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) {
                                ASSERT(!XFS_IS_PQUOTA_ON(mp));
                                ASSERT(mask & ATTR_GID);
                                ASSERT(gdqp);
-                                olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
+                                olddquot2 = xfs_qm_vop_chown(tp, ip,
                                                        &ip->i_gdquot, gdqp);
                        }
                        ip->i_d.di_gid = gid;
@@ -461,13 +463,25 @@ xfs_setattr(
        /*
         * Release any dquot(s) the inode had kept before chown.
         */
-        XFS_QM_DQRELE(mp, olddquot1);
+        xfs_qm_dqrele(olddquot1);
-        XFS_QM_DQRELE(mp, olddquot2);
+        xfs_qm_dqrele(olddquot2);
-        XFS_QM_DQRELE(mp, udqp);
+        xfs_qm_dqrele(udqp);
-        XFS_QM_DQRELE(mp, gdqp);
+        xfs_qm_dqrele(gdqp);
-        if (code) {
+        if (code)
                return code;
+        /*
+         * XXX(hch): Updating the ACL entries is not atomic vs the i_mode
+         *           update.  We could avoid this with linked transactions
+         *           and passing down the transaction pointer all the way
+         *           to attr_set.  No previous user of the generic
+         *           Posix ACL code seems to care about this issue either.
+         */
+        if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) {
+                code = -xfs_acl_chmod(inode);
+                if (code)
+                        return XFS_ERROR(code);
        }
        if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE) &&
@@ -482,8 +496,8 @@ xfs_setattr(
        commit_flags |= XFS_TRANS_ABORT;
        /* FALLTHROUGH */
 error_return:
-        XFS_QM_DQRELE(mp, udqp);
+        xfs_qm_dqrele(udqp);
-        XFS_QM_DQRELE(mp, gdqp);
+        xfs_qm_dqrele(gdqp);
        if (tp) {
                xfs_trans_cancel(tp, commit_flags);
        }
@@ -739,7 +753,8 @@ xfs_free_eofblocks(
                /*
                 * Attach the dquots to the inode up front.
                 */
-                if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
+                error = xfs_qm_dqattach(ip, 0);
+                if (error)
                        return error;
                /*
@@ -1181,7 +1196,8 @@ xfs_inactive(
        ASSERT(ip->i_d.di_nlink == 0);
-        if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
+        error = xfs_qm_dqattach(ip, 0);
+        if (error)
                return VN_INACTIVE_CACHE;
        tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
@@ -1307,7 +1323,7 @@ xfs_inactive(
                /*
                 * Credit the quota account(s). The inode is gone.
                 */
-                XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_ICOUNT, -1);
+                xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1);
                /*
                 * Just ignore errors at this point.  There is nothing we can
@@ -1323,11 +1339,11 @@ xfs_inactive(
                        xfs_fs_cmn_err(CE_NOTE, mp, "xfs_inactive: "
                                "xfs_trans_commit() returned error %d", error);
        }
        /*
         * Release the dquots held by inode, if any.
         */
-        XFS_QM_DQDETACH(mp, ip);
+        xfs_qm_dqdetach(ip);
        xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
 out:
@@ -1427,8 +1443,7 @@ xfs_create(
        /*
         * Make sure that we have allocated dquot(s) on disk.
         */
-        error = XFS_QM_DQVOPALLOC(mp, dp,
+        error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid,
-                        current_fsuid(), current_fsgid(), prid,
                        XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
        if (error)
                goto std_return;
@@ -1489,7 +1504,7 @@ xfs_create(
        /*
         * Reserve disk quota and the inode.
         */
-        error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0);
+        error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, resblks, 1, 0);
        if (error)
                goto out_trans_cancel;
@@ -1561,7 +1576,7 @@ xfs_create(
         * These ids of the inode couldn't have changed since the new
         * inode has been locked ever since it was created.
         */
-        XFS_QM_DQVOPCREATE(mp, tp, ip, udqp, gdqp);
+        xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp);
        /*
         * xfs_trans_commit normally decrements the vnode ref count
@@ -1580,8 +1595,8 @@ xfs_create(
                goto out_dqrele;
        }
-        XFS_QM_DQRELE(mp, udqp);
+        xfs_qm_dqrele(udqp);
-        XFS_QM_DQRELE(mp, gdqp);
+        xfs_qm_dqrele(gdqp);
        *ipp = ip;
@@ -1602,8 +1617,8 @@ xfs_create(
 out_trans_cancel:
        xfs_trans_cancel(tp, cancel_flags);
 out_dqrele:
-        XFS_QM_DQRELE(mp, udqp);
+        xfs_qm_dqrele(udqp);
-        XFS_QM_DQRELE(mp, gdqp);
+        xfs_qm_dqrele(gdqp);
        if (unlock_dp_on_error)
                xfs_iunlock(dp, XFS_ILOCK_EXCL);
@@ -1837,11 +1852,11 @@ xfs_remove(
                        return error;
        }
-        error = XFS_QM_DQATTACH(mp, dp, 0);
+        error = xfs_qm_dqattach(dp, 0);
        if (error)
                goto std_return;
-        error = XFS_QM_DQATTACH(mp, ip, 0);
+        error = xfs_qm_dqattach(ip, 0);
        if (error)
                goto std_return;
@@ -2028,11 +2043,11 @@ xfs_link(
        /* Return through std_return after this point. */
-        error = XFS_QM_DQATTACH(mp, sip, 0);
+        error = xfs_qm_dqattach(sip, 0);
        if (error)
                goto std_return;
-        error = XFS_QM_DQATTACH(mp, tdp, 0);
+        error = xfs_qm_dqattach(tdp, 0);
        if (error)
                goto std_return;
@@ -2205,8 +2220,7 @@ xfs_symlink(
        /*
         * Make sure that we have allocated dquot(s) on disk.
         */
-        error = XFS_QM_DQVOPALLOC(mp, dp,
+        error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid,
-                        current_fsuid(), current_fsgid(), prid,
                        XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
        if (error)
                goto std_return;
@@ -2248,7 +2262,7 @@ xfs_symlink(
        /*
         * Reserve disk quota : blocks and inode.
         */
-        error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0);
+        error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, resblks, 1, 0);
        if (error)
                goto error_return;
@@ -2288,7 +2302,7 @@ xfs_symlink(
        /*
         * Also attach the dquot(s) to it, if applicable.
         */
-        XFS_QM_DQVOPCREATE(mp, tp, ip, udqp, gdqp);
+        xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp);
        if (resblks)
                resblks -= XFS_IALLOC_SPACE_RES(mp);
@@ -2376,8 +2390,8 @@ xfs_symlink(
                goto error2;
        }
        error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-        XFS_QM_DQRELE(mp, udqp);
+        xfs_qm_dqrele(udqp);
-        XFS_QM_DQRELE(mp, gdqp);
+        xfs_qm_dqrele(gdqp);
        /* Fall through to std_return with error = 0 or errno from
         * xfs_trans_commit     */
@@ -2401,8 +2415,8 @@ std_return:
        cancel_flags |= XFS_TRANS_ABORT;
 error_return:
        xfs_trans_cancel(tp, cancel_flags);
-        XFS_QM_DQRELE(mp, udqp);
+        xfs_qm_dqrele(udqp);
-        XFS_QM_DQRELE(mp, gdqp);
+        xfs_qm_dqrele(gdqp);
        if (unlock_dp_on_error)
                xfs_iunlock(dp, XFS_ILOCK_EXCL);
@@ -2541,7 +2555,8 @@ xfs_alloc_file_space(
        if (XFS_FORCED_SHUTDOWN(mp))
                return XFS_ERROR(EIO);
-        if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
+        error = xfs_qm_dqattach(ip, 0);
+        if (error)
                return error;
        if (len <= 0)
@@ -2628,8 +2643,8 @@ retry:
                        break;
                }
                xfs_ilock(ip, XFS_ILOCK_EXCL);
-                error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip,
+                error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks,
-                                                      qblocks, 0, quota_flag);
+                                                      0, quota_flag);
                if (error)
                        goto error1;
@@ -2688,7 +2703,7 @@ dmapi_enospc_check:
 error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
        xfs_bmap_cancel(&free_list);
-        XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag);
+        xfs_trans_unreserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
 error1: /* Just cancel transaction */
        xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
@@ -2827,7 +2842,8 @@ xfs_free_file_space(
        xfs_itrace_entry(ip);
-        if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
+        error = xfs_qm_dqattach(ip, 0);
+        if (error)
                return error;
        error = 0;
@@ -2953,9 +2969,9 @@ xfs_free_file_space(
                        break;
                }
                xfs_ilock(ip, XFS_ILOCK_EXCL);
-                error = XFS_TRANS_RESERVE_QUOTA(mp, tp,
+                error = xfs_trans_reserve_quota(tp, mp,
-                                ip->i_udquot, ip->i_gdquot, resblks, 0,
+                                ip->i_udquot, ip->i_gdquot,
-                                XFS_QMOPT_RES_REGBLKS);
+                                resblks, 0, XFS_QMOPT_RES_REGBLKS);
                if (error)
                        goto error1;
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 04373c6c61ff..a9e102de71a1 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -18,6 +18,7 @@ int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags);
 #define XFS_ATTR_DMI            0x01    /* invocation from a DMI function */
 #define XFS_ATTR_NONBLOCK       0x02    /* return EAGAIN if operation would block */
 #define XFS_ATTR_NOLOCK         0x04    /* Don't grab any conflicting locks */
+#define XFS_ATTR_NOACL          0x08    /* Don't call xfs_acl_chmod */
 int xfs_readlink(struct xfs_inode *ip, char *link);
 int xfs_fsync(struct xfs_inode *ip);