Merge branch 'for-linus' of git://oss.sgi.com:8090/xfs/xfs-2.6

* 'for-linus' of git://oss.sgi.com:8090/xfs/xfs-2.6: (87 commits) [XFS] Fix merge failure [XFS] The forward declarations for the xfs_ioctl() helpers and the [XFS] Update XFS documentation for noikeep/ikeep. [XFS] Update XFS Documentation for ikeep and ihashsize [XFS] Remove unused HAVE_SPLICE macro. [XFS] Remove CONFIG_XFS_SECURITY. [XFS] xfs_bmap_compute_maxlevels should be based on di_forkoff [XFS] Always use di_forkoff when checking for attr space. [XFS] Ensure the inode is joined in xfs_itruncate_finish [XFS] Remove periodic logging of in-core superblock counters. [XFS] fix logic error in xfs_alloc_ag_vextent_near() [XFS] Don't error out on good I/Os. [XFS] Catch log unmount failures. [XFS] Sanitise xfs_log_force error checking. [XFS] Check for errors when changing buffer pointers. [XFS] Don't allow silent errors in xfs_inactive(). [XFS] Catch errors from xfs_imap(). [XFS] xfs_bulkstat_one_dinode() never returns an error. [XFS] xfs_iflush_fork() never returns an error. [XFS] Catch unwritten extent conversion errors. ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2008-04-18 11:39:39 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2008-04-18 11:39:39 -0400
commit: 253ba4e79edc695b2925bd2ef34de06ff4d4070c (patch)
tree: 259667140ca702d6a218cc54f4df275fbbda747b
parent: 188da98800893691e47eea9335a234378e32aceb (diff)
parent: 65e67f5165c8a156b34ee7adf65d5ed3b16a910d (diff)
66 files changed, 1907 insertions, 2304 deletions
diff --git a/Documentation/filesystems/xfs.txt b/Documentation/filesystems/xfs.txt
index 74aeb142ae5f..0a1668ba2600 100644
--- a/Documentation/filesystems/xfs.txt
+++ b/Documentation/filesystems/xfs.txt
@@ -52,16 +52,15 @@ When mounting an XFS filesystem, the following options are accepted.
        and also gets the setgid bit set if it is a directory itself.
  ihashsize=value
-        Sets the number of hash buckets available for hashing the
+        In memory inode hashes have been removed, so this option has
-        in-memory inodes of the specified mount point.  If a value
+        no function as of August 2007. Option is deprecated.
-        of zero is used, the value selected by the default algorithm
-        will be displayed in /proc/mounts.
  ikeep/noikeep
-        When inode clusters are emptied of inodes, keep them around
+        When ikeep is specified, XFS does not delete empty inode clusters
-        on the disk (ikeep) - this is the traditional XFS behaviour
+        and keeps them around on disk. ikeep is the traditional XFS
-        and is still the default for now.  Using the noikeep option,
+        behaviour. When noikeep is specified, empty inode clusters
-        inode clusters are returned to the free space pool.
+        are returned to the free space pool. The default is noikeep for
+        non-DMAPI mounts, while ikeep is the default when DMAPI is in use.
  inode64
        Indicates that XFS is allowed to create inodes at any location
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 35115bca036e..524021ff5436 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -35,18 +35,6 @@ config XFS_QUOTA
          with or without the generic quota support enabled (CONFIG_QUOTA) -
          they are completely independent subsystems.
-config XFS_SECURITY
-        bool "XFS Security Label support"
-        depends on XFS_FS
-        help
-          Security labels support alternative access control models
-          implemented by security modules like SELinux.  This option
-          enables an extended attribute namespace for inode security
-          labels in the XFS filesystem.
-          If you are not using a security module that requires using
-          extended attributes for inode security labels, say N.
 config XFS_POSIX_ACL
        bool "XFS POSIX ACL support"
        depends on XFS_FS
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
index e040f1ce1b6a..9b1bb17a0501 100644
--- a/fs/xfs/linux-2.6/kmem.c
+++ b/fs/xfs/linux-2.6/kmem.c
@@ -37,7 +37,7 @@ kmem_alloc(size_t size, unsigned int __nocast flags)
 #ifdef DEBUG
        if (unlikely(!(flags & KM_LARGE) && (size > PAGE_SIZE))) {
                printk(KERN_WARNING "Large %s attempt, size=%ld\n",
-                        __FUNCTION__, (long)size);
+                        __func__, (long)size);
                dump_stack();
        }
 #endif
@@ -52,7 +52,7 @@ kmem_alloc(size_t size, unsigned int __nocast flags)
                if (!(++retries % 100))
                        printk(KERN_ERR "XFS: possible memory allocation "
                                        "deadlock in %s (mode:0x%x)\n",
-                                        __FUNCTION__, lflags);
+                                        __func__, lflags);
                congestion_wait(WRITE, HZ/50);
        } while (1);
 }
@@ -129,7 +129,7 @@ kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags)
                if (!(++retries % 100))
                        printk(KERN_ERR "XFS: possible memory allocation "
                                        "deadlock in %s (mode:0x%x)\n",
-                                        __FUNCTION__, lflags);
+                                        __func__, lflags);
                congestion_wait(WRITE, HZ/50);
        } while (1);
 }
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index e0519529c26c..a55c3b26d840 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -243,8 +243,12 @@ xfs_end_bio_unwritten(
        size_t                  size = ioend->io_size;
        if (likely(!ioend->io_error)) {
-                if (!XFS_FORCED_SHUTDOWN(ip->i_mount))
+                if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
-                        xfs_iomap_write_unwritten(ip, offset, size);
+                        int error;
+                        error = xfs_iomap_write_unwritten(ip, offset, size);
+                        if (error)
+                                ioend->io_error = error;
+                }
                xfs_setfilesize(ioend);
        }
        xfs_destroy_ioend(ioend);
@@ -1532,9 +1536,9 @@ xfs_vm_bmap(
        struct xfs_inode        *ip = XFS_I(inode);
        xfs_itrace_entry(XFS_I(inode));
-        xfs_rwlock(ip, VRWLOCK_READ);
+        xfs_ilock(ip, XFS_IOLOCK_SHARED);
        xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF);
-        xfs_rwunlock(ip, VRWLOCK_READ);
+        xfs_iunlock(ip, XFS_IOLOCK_SHARED);
        return generic_block_bmap(mapping, block, xfs_get_blocks);
 }
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index e347bfd47c91..52f6846101d5 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -400,7 +400,7 @@ _xfs_buf_lookup_pages(
                                printk(KERN_ERR
                                        "XFS: possible memory allocation "
                                        "deadlock in %s (mode:0x%x)\n",
-                                        __FUNCTION__, gfp_mask);
+                                        __func__, gfp_mask);
                        XFS_STATS_INC(xb_page_retries);
                        xfsbufd_wakeup(0, gfp_mask);
@@ -598,7 +598,7 @@ xfs_buf_get_flags(
                error = _xfs_buf_map_pages(bp, flags);
                if (unlikely(error)) {
                        printk(KERN_WARNING "%s: failed to map pages\n",
-                                        __FUNCTION__);
+                                        __func__);
                        goto no_buffer;
                }
        }
@@ -778,7 +778,7 @@ xfs_buf_get_noaddr(
        error = _xfs_buf_map_pages(bp, XBF_MAPPED);
        if (unlikely(error)) {
                printk(KERN_WARNING "%s: failed to map pages\n",
-                                __FUNCTION__);
+                                __func__);
                goto fail_free_mem;
        }
@@ -1060,7 +1060,7 @@ xfs_buf_iostart(
                bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC);
                bp->b_flags |= flags & (XBF_DELWRI | XBF_ASYNC);
                xfs_buf_delwri_queue(bp, 1);
-                return status;
+                return 0;
        }
        bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index a3d207de48b8..841d7883528d 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -387,11 +387,15 @@ static inline int XFS_bwrite(xfs_buf_t *bp)
        return error;
 }
-static inline int xfs_bdwrite(void *mp, xfs_buf_t *bp)
+/*
+ * No error can be returned from xfs_buf_iostart for delwri
+ * buffers as they are queued and no I/O is issued.
+ */
+static inline void xfs_bdwrite(void *mp, xfs_buf_t *bp)
 {
        bp->b_strat = xfs_bdstrat_cb;
        bp->b_fspriv3 = mp;
-        return xfs_buf_iostart(bp, XBF_DELWRI | XBF_ASYNC);
+        (void)xfs_buf_iostart(bp, XBF_DELWRI | XBF_ASYNC);
 }
 #define XFS_bdstrat(bp) xfs_buf_iorequest(bp)
diff --git a/fs/xfs/linux-2.6/xfs_cred.h b/fs/xfs/linux-2.6/xfs_cred.h
index e7f3da61c6c3..652721ce0ea5 100644
--- a/fs/xfs/linux-2.6/xfs_cred.h
+++ b/fs/xfs/linux-2.6/xfs_cred.h
@@ -30,7 +30,7 @@ typedef struct cred {
 extern struct cred *sys_cred;
 /* this is a hack.. (assumes sys_cred is the only cred_t in the system) */
-static __inline int capable_cred(cred_t *cr, int cid)
+static inline int capable_cred(cred_t *cr, int cid)
 {
        return (cr == sys_cred) ? 1 : capable(cid);
 }
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index ca4f66c4de16..265f0168ab76 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -22,6 +22,7 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
+#include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_export.h"
@@ -30,8 +31,6 @@
 #include "xfs_inode.h"
 #include "xfs_vfsops.h"
-static struct dentry dotdot = { .d_name.name = "..", .d_name.len = 2, };
 /*
 * Note that we only accept fileids which are long enough rather than allow
 * the parent generation number to default to zero.  XFS considers zero a
@@ -66,7 +65,7 @@ xfs_fs_encode_fh(
        int                     len;
        /* Directories don't need their parent encoded, they have ".." */
-        if (S_ISDIR(inode->i_mode))
+        if (S_ISDIR(inode->i_mode) || !connectable)
                fileid_type = FILEID_INO32_GEN;
        else
                fileid_type = FILEID_INO32_GEN_PARENT;
@@ -213,17 +212,16 @@ xfs_fs_get_parent(
        struct dentry           *child)
 {
        int                     error;
-        bhv_vnode_t             *cvp;
+        struct xfs_inode        *cip;
        struct dentry           *parent;
-        cvp = NULL;
+        error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip);
-        error = xfs_lookup(XFS_I(child->d_inode), &dotdot, &cvp);
        if (unlikely(error))
                return ERR_PTR(-error);
-        parent = d_alloc_anon(vn_to_inode(cvp));
+        parent = d_alloc_anon(cip->i_vnode);
        if (unlikely(!parent)) {
-                VN_RELE(cvp);
+                iput(cip->i_vnode);
                return ERR_PTR(-ENOMEM);
        }
        return parent;
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index edab1ffbb163..05905246434d 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -469,16 +469,11 @@ xfs_file_open_exec(
        struct inode    *inode)
 {
        struct xfs_mount *mp = XFS_M(inode->i_sb);
+        struct xfs_inode *ip = XFS_I(inode);
-        if (unlikely(mp->m_flags & XFS_MOUNT_DMAPI)) {
+        if (unlikely(mp->m_flags & XFS_MOUNT_DMAPI) &&
-                if (DM_EVENT_ENABLED(XFS_I(inode), DM_EVENT_READ)) {
+                     DM_EVENT_ENABLED(ip, DM_EVENT_READ))
-                        bhv_vnode_t *vp = vn_from_inode(inode);
+                return -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, 0, 0, 0, NULL);
-                        return -XFS_SEND_DATA(mp, DM_EVENT_READ,
-                                                vp, 0, 0, 0, NULL);
-                }
-        }
        return 0;
 }
 #endif /* HAVE_FOP_OPEN_EXEC */
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
index ac6d34cc355d..1eefe61f0e10 100644
--- a/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -17,18 +17,7 @@
 */
 #include "xfs.h"
 #include "xfs_vnodeops.h"
-/*
- * The following six includes are needed so that we can include
- * xfs_inode.h.  What a mess..
- */
 #include "xfs_bmap_btree.h"
-#include "xfs_inum.h"
-#include "xfs_dir2.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dinode.h"
 #include "xfs_inode.h"
 int  fs_noerr(void) { return 0; }
@@ -42,11 +31,10 @@ xfs_tosspages(
        xfs_off_t       last,
        int             fiopt)
 {
-        bhv_vnode_t     *vp = XFS_ITOV(ip);
+        struct address_space *mapping = ip->i_vnode->i_mapping;
-        struct inode    *inode = vn_to_inode(vp);
-        if (VN_CACHED(vp))
+        if (mapping->nrpages)
-                truncate_inode_pages(inode->i_mapping, first);
+                truncate_inode_pages(mapping, first);
 }
 int
@@ -56,15 +44,14 @@ xfs_flushinval_pages(
        xfs_off_t       last,
        int             fiopt)
 {
-        bhv_vnode_t     *vp = XFS_ITOV(ip);
+        struct address_space *mapping = ip->i_vnode->i_mapping;
-        struct inode    *inode = vn_to_inode(vp);
        int             ret = 0;
-        if (VN_CACHED(vp)) {
+        if (mapping->nrpages) {
                xfs_iflags_clear(ip, XFS_ITRUNCATED);
-                ret = filemap_write_and_wait(inode->i_mapping);
+                ret = filemap_write_and_wait(mapping);
                if (!ret)
-                        truncate_inode_pages(inode->i_mapping, first);
+                        truncate_inode_pages(mapping, first);
        }
        return ret;
 }
@@ -77,17 +64,16 @@ xfs_flush_pages(
        uint64_t        flags,
        int             fiopt)
 {
-        bhv_vnode_t     *vp = XFS_ITOV(ip);
+        struct address_space *mapping = ip->i_vnode->i_mapping;
-        struct inode    *inode = vn_to_inode(vp);
        int             ret = 0;
        int             ret2;
-        if (VN_DIRTY(vp)) {
+        if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
                xfs_iflags_clear(ip, XFS_ITRUNCATED);
-                ret = filemap_fdatawrite(inode->i_mapping);
+                ret = filemap_fdatawrite(mapping);
                if (flags & XFS_B_ASYNC)
                        return ret;
-                ret2 = filemap_fdatawait(inode->i_mapping);
+                ret2 = filemap_fdatawait(mapping);
                if (!ret)
                        ret = ret2;
        }
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index f34bd010eb51..bf7759793856 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -651,314 +651,6 @@ xfs_attrmulti_by_handle(
        return -error;
 }
-/* prototypes for a few of the stack-hungry cases that have
- * their own functions.  Functions are defined after their use
- * so gcc doesn't get fancy and inline them with -03 */
-STATIC int
-xfs_ioc_space(
-        struct xfs_inode        *ip,
-        struct inode            *inode,
-        struct file             *filp,
-        int                     flags,
-        unsigned int            cmd,
-        void                    __user *arg);
-STATIC int
-xfs_ioc_bulkstat(
-        xfs_mount_t             *mp,
-        unsigned int            cmd,
-        void                    __user *arg);
-STATIC int
-xfs_ioc_fsgeometry_v1(
-        xfs_mount_t             *mp,
-        void                    __user *arg);
-STATIC int
-xfs_ioc_fsgeometry(
-        xfs_mount_t             *mp,
-        void                    __user *arg);
-STATIC int
-xfs_ioc_xattr(
-        xfs_inode_t             *ip,
-        struct file             *filp,
-        unsigned int            cmd,
-        void                    __user *arg);
-STATIC int
-xfs_ioc_fsgetxattr(
-        xfs_inode_t             *ip,
-        int                     attr,
-        void                    __user *arg);
-STATIC int
-xfs_ioc_getbmap(
-        struct xfs_inode        *ip,
-        int                     flags,
-        unsigned int            cmd,
-        void                    __user *arg);
-STATIC int
-xfs_ioc_getbmapx(
-        struct xfs_inode        *ip,
-        void                    __user *arg);
-int
-xfs_ioctl(
-        xfs_inode_t             *ip,
-        struct file             *filp,
-        int                     ioflags,
-        unsigned int            cmd,
-        void                    __user *arg)
-{
-        struct inode            *inode = filp->f_path.dentry->d_inode;
-        xfs_mount_t             *mp = ip->i_mount;
-        int                     error;
-        xfs_itrace_entry(XFS_I(inode));
-        switch (cmd) {
-        case XFS_IOC_ALLOCSP:
-        case XFS_IOC_FREESP:
-        case XFS_IOC_RESVSP:
-        case XFS_IOC_UNRESVSP:
-        case XFS_IOC_ALLOCSP64:
-        case XFS_IOC_FREESP64:
-        case XFS_IOC_RESVSP64:
-        case XFS_IOC_UNRESVSP64:
-                /*
-                 * Only allow the sys admin to reserve space unless
-                 * unwritten extents are enabled.
-                 */
-                if (!xfs_sb_version_hasextflgbit(&mp->m_sb) &&
-                    !capable(CAP_SYS_ADMIN))
-                        return -EPERM;
-                return xfs_ioc_space(ip, inode, filp, ioflags, cmd, arg);
-        case XFS_IOC_DIOINFO: {
-                struct dioattr  da;
-                xfs_buftarg_t   *target =
-                        XFS_IS_REALTIME_INODE(ip) ?
-                        mp->m_rtdev_targp : mp->m_ddev_targp;
-                da.d_mem = da.d_miniosz = 1 << target->bt_sshift;
-                da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1);
-                if (copy_to_user(arg, &da, sizeof(da)))
-                        return -XFS_ERROR(EFAULT);
-                return 0;
-        }
-        case XFS_IOC_FSBULKSTAT_SINGLE:
-        case XFS_IOC_FSBULKSTAT:
-        case XFS_IOC_FSINUMBERS:
-                return xfs_ioc_bulkstat(mp, cmd, arg);
-        case XFS_IOC_FSGEOMETRY_V1:
-                return xfs_ioc_fsgeometry_v1(mp, arg);
-        case XFS_IOC_FSGEOMETRY:
-                return xfs_ioc_fsgeometry(mp, arg);
-        case XFS_IOC_GETVERSION:
-                return put_user(inode->i_generation, (int __user *)arg);
-        case XFS_IOC_FSGETXATTR:
-                return xfs_ioc_fsgetxattr(ip, 0, arg);
-        case XFS_IOC_FSGETXATTRA:
-                return xfs_ioc_fsgetxattr(ip, 1, arg);
-        case XFS_IOC_GETXFLAGS:
-        case XFS_IOC_SETXFLAGS:
-        case XFS_IOC_FSSETXATTR:
-                return xfs_ioc_xattr(ip, filp, cmd, arg);
-        case XFS_IOC_FSSETDM: {
-                struct fsdmidata        dmi;
-                if (copy_from_user(&dmi, arg, sizeof(dmi)))
-                        return -XFS_ERROR(EFAULT);
-                error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask,
-                                dmi.fsd_dmstate);
-                return -error;
-        }
-        case XFS_IOC_GETBMAP:
-        case XFS_IOC_GETBMAPA:
-                return xfs_ioc_getbmap(ip, ioflags, cmd, arg);
-        case XFS_IOC_GETBMAPX:
-                return xfs_ioc_getbmapx(ip, arg);
-        case XFS_IOC_FD_TO_HANDLE:
-        case XFS_IOC_PATH_TO_HANDLE:
-        case XFS_IOC_PATH_TO_FSHANDLE:
-                return xfs_find_handle(cmd, arg);
-        case XFS_IOC_OPEN_BY_HANDLE:
-                return xfs_open_by_handle(mp, arg, filp, inode);
-        case XFS_IOC_FSSETDM_BY_HANDLE:
-                return xfs_fssetdm_by_handle(mp, arg, inode);
-        case XFS_IOC_READLINK_BY_HANDLE:
-                return xfs_readlink_by_handle(mp, arg, inode);
-        case XFS_IOC_ATTRLIST_BY_HANDLE:
-                return xfs_attrlist_by_handle(mp, arg, inode);
-        case XFS_IOC_ATTRMULTI_BY_HANDLE:
-                return xfs_attrmulti_by_handle(mp, arg, inode);
-        case XFS_IOC_SWAPEXT: {
-                error = xfs_swapext((struct xfs_swapext __user *)arg);
-                return -error;
-        }
-        case XFS_IOC_FSCOUNTS: {
-                xfs_fsop_counts_t out;
-                error = xfs_fs_counts(mp, &out);
-                if (error)
-                        return -error;
-                if (copy_to_user(arg, &out, sizeof(out)))
-                        return -XFS_ERROR(EFAULT);
-                return 0;
-        }
-        case XFS_IOC_SET_RESBLKS: {
-                xfs_fsop_resblks_t inout;
-                __uint64_t         in;
-                if (!capable(CAP_SYS_ADMIN))
-                        return -EPERM;
-                if (copy_from_user(&inout, arg, sizeof(inout)))
-                        return -XFS_ERROR(EFAULT);
-                /* input parameter is passed in resblks field of structure */
-                in = inout.resblks;
-                error = xfs_reserve_blocks(mp, &in, &inout);
-                if (error)
-                        return -error;
-                if (copy_to_user(arg, &inout, sizeof(inout)))
-                        return -XFS_ERROR(EFAULT);
-                return 0;
-        }
-        case XFS_IOC_GET_RESBLKS: {
-                xfs_fsop_resblks_t out;
-                if (!capable(CAP_SYS_ADMIN))
-                        return -EPERM;
-                error = xfs_reserve_blocks(mp, NULL, &out);
-                if (error)
-                        return -error;
-                if (copy_to_user(arg, &out, sizeof(out)))
-                        return -XFS_ERROR(EFAULT);
-                return 0;
-        }
-        case XFS_IOC_FSGROWFSDATA: {
-                xfs_growfs_data_t in;
-                if (!capable(CAP_SYS_ADMIN))
-                        return -EPERM;
-                if (copy_from_user(&in, arg, sizeof(in)))
-                        return -XFS_ERROR(EFAULT);
-                error = xfs_growfs_data(mp, &in);
-                return -error;
-        }
-        case XFS_IOC_FSGROWFSLOG: {
-                xfs_growfs_log_t in;
-                if (!capable(CAP_SYS_ADMIN))
-                        return -EPERM;
-                if (copy_from_user(&in, arg, sizeof(in)))
-                        return -XFS_ERROR(EFAULT);
-                error = xfs_growfs_log(mp, &in);
-                return -error;
-        }
-        case XFS_IOC_FSGROWFSRT: {
-                xfs_growfs_rt_t in;
-                if (!capable(CAP_SYS_ADMIN))
-                        return -EPERM;
-                if (copy_from_user(&in, arg, sizeof(in)))
-                        return -XFS_ERROR(EFAULT);
-                error = xfs_growfs_rt(mp, &in);
-                return -error;
-        }
-        case XFS_IOC_FREEZE:
-                if (!capable(CAP_SYS_ADMIN))
-                        return -EPERM;
-                if (inode->i_sb->s_frozen == SB_UNFROZEN)
-                        freeze_bdev(inode->i_sb->s_bdev);
-                return 0;
-        case XFS_IOC_THAW:
-                if (!capable(CAP_SYS_ADMIN))
-                        return -EPERM;
-                if (inode->i_sb->s_frozen != SB_UNFROZEN)
-                        thaw_bdev(inode->i_sb->s_bdev, inode->i_sb);
-                return 0;
-        case XFS_IOC_GOINGDOWN: {
-                __uint32_t in;
-                if (!capable(CAP_SYS_ADMIN))
-                        return -EPERM;
-                if (get_user(in, (__uint32_t __user *)arg))
-                        return -XFS_ERROR(EFAULT);
-                error = xfs_fs_goingdown(mp, in);
-                return -error;
-        }
-        case XFS_IOC_ERROR_INJECTION: {
-                xfs_error_injection_t in;
-                if (!capable(CAP_SYS_ADMIN))
-                        return -EPERM;
-                if (copy_from_user(&in, arg, sizeof(in)))
-                        return -XFS_ERROR(EFAULT);
-                error = xfs_errortag_add(in.errtag, mp);
-                return -error;
-        }
-        case XFS_IOC_ERROR_CLEARALL:
-                if (!capable(CAP_SYS_ADMIN))
-                        return -EPERM;
-                error = xfs_errortag_clearall(mp, 1);
-                return -error;
-        default:
-                return -ENOTTY;
-        }
-}
 STATIC int
 xfs_ioc_space(
        struct xfs_inode        *ip,
@@ -1179,85 +871,85 @@ xfs_ioc_fsgetxattr(
 }
 STATIC int
-xfs_ioc_xattr(
+xfs_ioc_fssetxattr(
        xfs_inode_t             *ip,
        struct file             *filp,
-        unsigned int            cmd,
        void                    __user *arg)
 {
        struct fsxattr          fa;
        struct bhv_vattr        *vattr;
-        int                     error = 0;
+        int                     error;
        int                     attr_flags;
-        unsigned int            flags;
+        if (copy_from_user(&fa, arg, sizeof(fa)))
+                return -EFAULT;
        vattr = kmalloc(sizeof(*vattr), GFP_KERNEL);
        if (unlikely(!vattr))
                return -ENOMEM;
-        switch (cmd) {
+        attr_flags = 0;
-        case XFS_IOC_FSSETXATTR: {
+        if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
-                if (copy_from_user(&fa, arg, sizeof(fa))) {
+                attr_flags |= ATTR_NONBLOCK;
-                        error = -EFAULT;
-                        break;
-                }
-                attr_flags = 0;
+        vattr->va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | XFS_AT_PROJID;
-                if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
+        vattr->va_xflags  = fa.fsx_xflags;
-                        attr_flags |= ATTR_NONBLOCK;
+        vattr->va_extsize = fa.fsx_extsize;
+        vattr->va_projid  = fa.fsx_projid;
-                vattr->va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | XFS_AT_PROJID;
+        error = -xfs_setattr(ip, vattr, attr_flags, NULL);
-                vattr->va_xflags  = fa.fsx_xflags;
+        if (!error)
-                vattr->va_extsize = fa.fsx_extsize;
+                vn_revalidate(XFS_ITOV(ip));    /* update flags */
-                vattr->va_projid  = fa.fsx_projid;
+        kfree(vattr);
+        return 0;
+}
-                error = xfs_setattr(ip, vattr, attr_flags, NULL);
+STATIC int
-                if (likely(!error))
+xfs_ioc_getxflags(
-                        vn_revalidate(XFS_ITOV(ip));    /* update flags */
+        xfs_inode_t             *ip,
-                error = -error;
+        void                    __user *arg)
-                break;
+{
-        }
+        unsigned int            flags;
-        case XFS_IOC_GETXFLAGS: {
+        flags = xfs_di2lxflags(ip->i_d.di_flags);
-                flags = xfs_di2lxflags(ip->i_d.di_flags);
+        if (copy_to_user(arg, &flags, sizeof(flags)))
-                if (copy_to_user(arg, &flags, sizeof(flags)))
+                return -EFAULT;
-                        error = -EFAULT;
+        return 0;
-                break;
+}
-        }
-        case XFS_IOC_SETXFLAGS: {
+STATIC int
-                if (copy_from_user(&flags, arg, sizeof(flags))) {
+xfs_ioc_setxflags(
-                        error = -EFAULT;
+        xfs_inode_t             *ip,
-                        break;
+        struct file             *filp,
-                }
+        void                    __user *arg)
+{
+        struct bhv_vattr        *vattr;
+        unsigned int            flags;
+        int                     attr_flags;
+        int                     error;
-                if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
+        if (copy_from_user(&flags, arg, sizeof(flags)))
-                              FS_NOATIME_FL | FS_NODUMP_FL | \
+                return -EFAULT;
-                              FS_SYNC_FL)) {
-                        error = -EOPNOTSUPP;
-                        break;
-                }
-                attr_flags = 0;
+        if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
-                if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
+                      FS_NOATIME_FL | FS_NODUMP_FL | \
-                        attr_flags |= ATTR_NONBLOCK;
+                      FS_SYNC_FL))
+                return -EOPNOTSUPP;
-                vattr->va_mask = XFS_AT_XFLAGS;
+        vattr = kmalloc(sizeof(*vattr), GFP_KERNEL);
-                vattr->va_xflags = xfs_merge_ioc_xflags(flags,
+        if (unlikely(!vattr))
-                                                        xfs_ip2xflags(ip));
+                return -ENOMEM;
-                error = xfs_setattr(ip, vattr, attr_flags, NULL);
+        attr_flags = 0;
-                if (likely(!error))
+        if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
-                        vn_revalidate(XFS_ITOV(ip));    /* update flags */
+                attr_flags |= ATTR_NONBLOCK;
-                error = -error;
-                break;
-        }
-        default:
+        vattr->va_mask = XFS_AT_XFLAGS;
-                error = -ENOTTY;
+        vattr->va_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip));
-                break;
-        }
+        error = -xfs_setattr(ip, vattr, attr_flags, NULL);
+        if (likely(!error))
+                vn_revalidate(XFS_ITOV(ip));    /* update flags */
        kfree(vattr);
        return error;
 }
@@ -1332,3 +1024,259 @@ xfs_ioc_getbmapx(
        return 0;
 }
+int
+xfs_ioctl(
+        xfs_inode_t             *ip,
+        struct file             *filp,
+        int                     ioflags,
+        unsigned int            cmd,
+        void                    __user *arg)
+{
+        struct inode            *inode = filp->f_path.dentry->d_inode;
+        xfs_mount_t             *mp = ip->i_mount;
+        int                     error;
+        xfs_itrace_entry(XFS_I(inode));
+        switch (cmd) {
+        case XFS_IOC_ALLOCSP:
+        case XFS_IOC_FREESP:
+        case XFS_IOC_RESVSP:
+        case XFS_IOC_UNRESVSP:
+        case XFS_IOC_ALLOCSP64:
+        case XFS_IOC_FREESP64:
+        case XFS_IOC_RESVSP64:
+        case XFS_IOC_UNRESVSP64:
+                /*
+                 * Only allow the sys admin to reserve space unless
+                 * unwritten extents are enabled.
+                 */
+                if (!xfs_sb_version_hasextflgbit(&mp->m_sb) &&
+                    !capable(CAP_SYS_ADMIN))
+                        return -EPERM;
+                return xfs_ioc_space(ip, inode, filp, ioflags, cmd, arg);
+        case XFS_IOC_DIOINFO: {
+                struct dioattr  da;
+                xfs_buftarg_t   *target =
+                        XFS_IS_REALTIME_INODE(ip) ?
+                        mp->m_rtdev_targp : mp->m_ddev_targp;
+                da.d_mem = da.d_miniosz = 1 << target->bt_sshift;
+                da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1);
+                if (copy_to_user(arg, &da, sizeof(da)))
+                        return -XFS_ERROR(EFAULT);
+                return 0;
+        }
+        case XFS_IOC_FSBULKSTAT_SINGLE:
+        case XFS_IOC_FSBULKSTAT:
+        case XFS_IOC_FSINUMBERS:
+                return xfs_ioc_bulkstat(mp, cmd, arg);
+        case XFS_IOC_FSGEOMETRY_V1:
+                return xfs_ioc_fsgeometry_v1(mp, arg);
+        case XFS_IOC_FSGEOMETRY:
+                return xfs_ioc_fsgeometry(mp, arg);
+        case XFS_IOC_GETVERSION:
+                return put_user(inode->i_generation, (int __user *)arg);
+        case XFS_IOC_FSGETXATTR:
+                return xfs_ioc_fsgetxattr(ip, 0, arg);
+        case XFS_IOC_FSGETXATTRA:
+                return xfs_ioc_fsgetxattr(ip, 1, arg);
+        case XFS_IOC_FSSETXATTR:
+                return xfs_ioc_fssetxattr(ip, filp, arg);
+        case XFS_IOC_GETXFLAGS:
+                return xfs_ioc_getxflags(ip, arg);
+        case XFS_IOC_SETXFLAGS:
+                return xfs_ioc_setxflags(ip, filp, arg);
+        case XFS_IOC_FSSETDM: {
+                struct fsdmidata        dmi;
+                if (copy_from_user(&dmi, arg, sizeof(dmi)))
+                        return -XFS_ERROR(EFAULT);
+                error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask,
+                                dmi.fsd_dmstate);
+                return -error;
+        }
+        case XFS_IOC_GETBMAP:
+        case XFS_IOC_GETBMAPA:
+                return xfs_ioc_getbmap(ip, ioflags, cmd, arg);
+        case XFS_IOC_GETBMAPX:
+                return xfs_ioc_getbmapx(ip, arg);
+        case XFS_IOC_FD_TO_HANDLE:
+        case XFS_IOC_PATH_TO_HANDLE:
+        case XFS_IOC_PATH_TO_FSHANDLE:
+                return xfs_find_handle(cmd, arg);
+        case XFS_IOC_OPEN_BY_HANDLE:
+                return xfs_open_by_handle(mp, arg, filp, inode);
+        case XFS_IOC_FSSETDM_BY_HANDLE:
+                return xfs_fssetdm_by_handle(mp, arg, inode);
+        case XFS_IOC_READLINK_BY_HANDLE:
+                return xfs_readlink_by_handle(mp, arg, inode);
+        case XFS_IOC_ATTRLIST_BY_HANDLE:
+                return xfs_attrlist_by_handle(mp, arg, inode);
+        case XFS_IOC_ATTRMULTI_BY_HANDLE:
+                return xfs_attrmulti_by_handle(mp, arg, inode);
+        case XFS_IOC_SWAPEXT: {
+                error = xfs_swapext((struct xfs_swapext __user *)arg);
+                return -error;
+        }
+        case XFS_IOC_FSCOUNTS: {
+                xfs_fsop_counts_t out;
+                error = xfs_fs_counts(mp, &out);
+                if (error)
+                        return -error;
+                if (copy_to_user(arg, &out, sizeof(out)))
+                        return -XFS_ERROR(EFAULT);
+                return 0;
+        }
+        case XFS_IOC_SET_RESBLKS: {
+                xfs_fsop_resblks_t inout;
+                __uint64_t         in;
+                if (!capable(CAP_SYS_ADMIN))
+                        return -EPERM;
+                if (copy_from_user(&inout, arg, sizeof(inout)))
+                        return -XFS_ERROR(EFAULT);
+                /* input parameter is passed in resblks field of structure */
+                in = inout.resblks;
+                error = xfs_reserve_blocks(mp, &in, &inout);
+                if (error)
+                        return -error;
+                if (copy_to_user(arg, &inout, sizeof(inout)))
+                        return -XFS_ERROR(EFAULT);
+                return 0;
+        }
+        case XFS_IOC_GET_RESBLKS: {
+                xfs_fsop_resblks_t out;
+                if (!capable(CAP_SYS_ADMIN))
+                        return -EPERM;
+                error = xfs_reserve_blocks(mp, NULL, &out);
+                if (error)
+                        return -error;
+                if (copy_to_user(arg, &out, sizeof(out)))
+                        return -XFS_ERROR(EFAULT);
+                return 0;
+        }
+        case XFS_IOC_FSGROWFSDATA: {
+                xfs_growfs_data_t in;
+                if (!capable(CAP_SYS_ADMIN))
+                        return -EPERM;
+                if (copy_from_user(&in, arg, sizeof(in)))
+                        return -XFS_ERROR(EFAULT);
+                error = xfs_growfs_data(mp, &in);
+                return -error;
+        }
+        case XFS_IOC_FSGROWFSLOG: {
+                xfs_growfs_log_t in;
+                if (!capable(CAP_SYS_ADMIN))
+                        return -EPERM;
+                if (copy_from_user(&in, arg, sizeof(in)))
+                        return -XFS_ERROR(EFAULT);
+                error = xfs_growfs_log(mp, &in);
+                return -error;
+        }
+        case XFS_IOC_FSGROWFSRT: {
+                xfs_growfs_rt_t in;
+                if (!capable(CAP_SYS_ADMIN))
+                        return -EPERM;
+                if (copy_from_user(&in, arg, sizeof(in)))
+                        return -XFS_ERROR(EFAULT);
+                error = xfs_growfs_rt(mp, &in);
+                return -error;
+        }
+        case XFS_IOC_FREEZE:
+                if (!capable(CAP_SYS_ADMIN))
+                        return -EPERM;
+                if (inode->i_sb->s_frozen == SB_UNFROZEN)
+                        freeze_bdev(inode->i_sb->s_bdev);
+                return 0;
+        case XFS_IOC_THAW:
+                if (!capable(CAP_SYS_ADMIN))
+                        return -EPERM;
+                if (inode->i_sb->s_frozen != SB_UNFROZEN)
+                        thaw_bdev(inode->i_sb->s_bdev, inode->i_sb);
+                return 0;
+        case XFS_IOC_GOINGDOWN: {
+                __uint32_t in;
+                if (!capable(CAP_SYS_ADMIN))
+                        return -EPERM;
+                if (get_user(in, (__uint32_t __user *)arg))
+                        return -XFS_ERROR(EFAULT);
+                error = xfs_fs_goingdown(mp, in);
+                return -error;
+        }
+        case XFS_IOC_ERROR_INJECTION: {
+                xfs_error_injection_t in;
+                if (!capable(CAP_SYS_ADMIN))
+                        return -EPERM;
+                if (copy_from_user(&in, arg, sizeof(in)))
+                        return -XFS_ERROR(EFAULT);
+                error = xfs_errortag_add(in.errtag, mp);
+                return -error;
+        }
+        case XFS_IOC_ERROR_CLEARALL:
+                if (!capable(CAP_SYS_ADMIN))
+                        return -EPERM;
+                error = xfs_errortag_clearall(mp, 1);
+                return -error;
+        default:
+                return -ENOTTY;
+        }
+}
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index cc4abd3daa49..0c958cf77758 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -62,12 +62,11 @@ void
 xfs_synchronize_atime(
        xfs_inode_t     *ip)
 {
-        bhv_vnode_t     *vp;
+        struct inode    *inode = ip->i_vnode;
-        vp = XFS_ITOV_NULL(ip);
+        if (inode) {
-        if (vp) {
+                ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
-                ip->i_d.di_atime.t_sec = (__int32_t)vp->i_atime.tv_sec;
+                ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;
-                ip->i_d.di_atime.t_nsec = (__int32_t)vp->i_atime.tv_nsec;
        }
 }
@@ -80,11 +79,10 @@ void
 xfs_mark_inode_dirty_sync(
        xfs_inode_t     *ip)
 {
-        bhv_vnode_t     *vp;
+        struct inode    *inode = ip->i_vnode;
-        vp = XFS_ITOV_NULL(ip);
+        if (inode)
-        if (vp)
+                mark_inode_dirty_sync(inode);
-                mark_inode_dirty_sync(vn_to_inode(vp));
 }
 /*
@@ -215,66 +213,62 @@ xfs_validate_fields(
 */
 STATIC int
 xfs_init_security(
-        bhv_vnode_t     *vp,
+        struct inode    *inode,
        struct inode    *dir)
 {
-        struct inode    *ip = vn_to_inode(vp);
+        struct xfs_inode *ip = XFS_I(inode);
        size_t          length;
        void            *value;
        char            *name;
        int             error;
-        error = security_inode_init_security(ip, dir, &name, &value, &length);
+        error = security_inode_init_security(inode, dir, &name,
+                                             &value, &length);
        if (error) {
                if (error == -EOPNOTSUPP)
                        return 0;
                return -error;
        }
-        error = xfs_attr_set(XFS_I(ip), name, value,
+        error = xfs_attr_set(ip, name, value, length, ATTR_SECURE);
-                        length, ATTR_SECURE);
        if (!error)
-                xfs_iflags_set(XFS_I(ip), XFS_IMODIFIED);
+                xfs_iflags_set(ip, XFS_IMODIFIED);
        kfree(name);
        kfree(value);
        return error;
 }
-/*
+static void
- * Determine whether a process has a valid fs_struct (kernel daemons
+xfs_dentry_to_name(
- * like knfsd don't have an fs_struct).
+        struct xfs_name *namep,
- *
+        struct dentry   *dentry)
- * XXX(hch):  nfsd is broken, better fix it instead.
- */
-STATIC_INLINE int
-xfs_has_fs_struct(struct task_struct *task)
 {
-        return (task->fs != init_task.fs);
+        namep->name = dentry->d_name.name;
+        namep->len = dentry->d_name.len;
 }
 STATIC void
 xfs_cleanup_inode(
        struct inode    *dir,
-        bhv_vnode_t     *vp,
+        struct inode    *inode,
        struct dentry   *dentry,
        int             mode)
 {
-        struct dentry   teardown = {};
+        struct xfs_name teardown;
        /* Oh, the horror.
         * If we can't add the ACL or we fail in
         * xfs_init_security we must back out.
         * ENOSPC can hit here, among other things.
         */
-        teardown.d_inode = vn_to_inode(vp);
+        xfs_dentry_to_name(&teardown, dentry);
-        teardown.d_name = dentry->d_name;
        if (S_ISDIR(mode))
-                xfs_rmdir(XFS_I(dir), &teardown);
+                xfs_rmdir(XFS_I(dir), &teardown, XFS_I(inode));
        else
-                xfs_remove(XFS_I(dir), &teardown);
+                xfs_remove(XFS_I(dir), &teardown, XFS_I(inode));
-        VN_RELE(vp);
+        iput(inode);
 }
 STATIC int
@@ -284,9 +278,10 @@ xfs_vn_mknod(
        int             mode,
        dev_t           rdev)
 {
-        struct inode    *ip;
+        struct inode    *inode;
-        bhv_vnode_t     *vp = NULL, *dvp = vn_from_inode(dir);
+        struct xfs_inode *ip = NULL;
        xfs_acl_t       *default_acl = NULL;
+        struct xfs_name name;
        attrexists_t    test_default_acl = _ACL_DEFAULT_EXISTS;
        int             error;
@@ -297,59 +292,67 @@ xfs_vn_mknod(
        if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff))
                return -EINVAL;
-        if (unlikely(test_default_acl && test_default_acl(dvp))) {
+        if (test_default_acl && test_default_acl(dir)) {
                if (!_ACL_ALLOC(default_acl)) {
                        return -ENOMEM;
                }
-                if (!_ACL_GET_DEFAULT(dvp, default_acl)) {
+                if (!_ACL_GET_DEFAULT(dir, default_acl)) {
                        _ACL_FREE(default_acl);
                        default_acl = NULL;
                }
        }
-        if (IS_POSIXACL(dir) && !default_acl && xfs_has_fs_struct(current))
+        xfs_dentry_to_name(&name, dentry);
+        if (IS_POSIXACL(dir) && !default_acl)
                mode &= ~current->fs->umask;
        switch (mode & S_IFMT) {
-        case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK:
+        case S_IFCHR:
+        case S_IFBLK:
+        case S_IFIFO:
+        case S_IFSOCK:
                rdev = sysv_encode_dev(rdev);
        case S_IFREG:
-                error = xfs_create(XFS_I(dir), dentry, mode, rdev, &vp, NULL);
+                error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip, NULL);
                break;
        case S_IFDIR:
-                error = xfs_mkdir(XFS_I(dir), dentry, mode, &vp, NULL);
+                error = xfs_mkdir(XFS_I(dir), &name, mode, &ip, NULL);
                break;
        default:
                error = EINVAL;
                break;
        }
-        if (unlikely(!error)) {
+        if (unlikely(error))
-                error = xfs_init_security(vp, dir);
+                goto out_free_acl;
-                if (error)
-                        xfs_cleanup_inode(dir, vp, dentry, mode);
-        }
-        if (unlikely(default_acl)) {
+        inode = ip->i_vnode;
-                if (!error) {
-                        error = _ACL_INHERIT(vp, mode, default_acl);
+        error = xfs_init_security(inode, dir);
-                        if (!error)
+        if (unlikely(error))
-                                xfs_iflags_set(XFS_I(vp), XFS_IMODIFIED);
+                goto out_cleanup_inode;
-                        else
-                                xfs_cleanup_inode(dir, vp, dentry, mode);
+        if (default_acl) {
-                }
+                error = _ACL_INHERIT(inode, mode, default_acl);
+                if (unlikely(error))
+                        goto out_cleanup_inode;
+                xfs_iflags_set(ip, XFS_IMODIFIED);
                _ACL_FREE(default_acl);
        }
-        if (likely(!error)) {
-                ASSERT(vp);
-                ip = vn_to_inode(vp);
-                if (S_ISDIR(mode))
+        if (S_ISDIR(mode))
-                        xfs_validate_fields(ip);
+                xfs_validate_fields(inode);
-                d_instantiate(dentry, ip);
+        d_instantiate(dentry, inode);
-                xfs_validate_fields(dir);
+        xfs_validate_fields(dir);
-        }
+        return -error;
+ out_cleanup_inode:
+        xfs_cleanup_inode(dir, inode, dentry, mode);
+ out_free_acl:
+        if (default_acl)
+                _ACL_FREE(default_acl);
        return -error;
 }
@@ -378,13 +381,15 @@ xfs_vn_lookup(
        struct dentry   *dentry,
        struct nameidata *nd)
 {
-        bhv_vnode_t     *cvp;
+        struct xfs_inode *cip;
+        struct xfs_name name;
        int             error;
        if (dentry->d_name.len >= MAXNAMELEN)
                return ERR_PTR(-ENAMETOOLONG);
-        error = xfs_lookup(XFS_I(dir), dentry, &cvp);
+        xfs_dentry_to_name(&name, dentry);
+        error = xfs_lookup(XFS_I(dir), &name, &cip);
        if (unlikely(error)) {
                if (unlikely(error != ENOENT))
                        return ERR_PTR(-error);
@@ -392,7 +397,7 @@ xfs_vn_lookup(
                return NULL;
        }
-        return d_splice_alias(vn_to_inode(cvp), dentry);
+        return d_splice_alias(cip->i_vnode, dentry);
 }
 STATIC int
@@ -401,23 +406,24 @@ xfs_vn_link(
        struct inode    *dir,
        struct dentry   *dentry)
 {
-        struct inode    *ip;    /* inode of guy being linked to */
+        struct inode    *inode; /* inode of guy being linked to */
-        bhv_vnode_t     *vp;    /* vp of name being linked */
+        struct xfs_name name;
        int             error;
-        ip = old_dentry->d_inode;       /* inode being linked to */
+        inode = old_dentry->d_inode;
-        vp = vn_from_inode(ip);
+        xfs_dentry_to_name(&name, dentry);
-        VN_HOLD(vp);
+        igrab(inode);
-        error = xfs_link(XFS_I(dir), vp, dentry);
+        error = xfs_link(XFS_I(dir), XFS_I(inode), &name);
        if (unlikely(error)) {
-                VN_RELE(vp);
+                iput(inode);
-        } else {
+                return -error;
-                xfs_iflags_set(XFS_I(dir), XFS_IMODIFIED);
-                xfs_validate_fields(ip);
-                d_instantiate(dentry, ip);
        }
-        return -error;
+        xfs_iflags_set(XFS_I(dir), XFS_IMODIFIED);
+        xfs_validate_fields(inode);
+        d_instantiate(dentry, inode);
+        return 0;
 }
 STATIC int
@@ -426,11 +432,13 @@ xfs_vn_unlink(
        struct dentry   *dentry)
 {
        struct inode    *inode;
+        struct xfs_name name;
        int             error;
        inode = dentry->d_inode;
+        xfs_dentry_to_name(&name, dentry);
-        error = xfs_remove(XFS_I(dir), dentry);
+        error = xfs_remove(XFS_I(dir), &name, XFS_I(inode));
        if (likely(!error)) {
                xfs_validate_fields(dir);       /* size needs update */
                xfs_validate_fields(inode);
@@ -444,29 +452,34 @@ xfs_vn_symlink(
        struct dentry   *dentry,
        const char      *symname)
 {
-        struct inode    *ip;
+        struct inode    *inode;
-        bhv_vnode_t     *cvp;   /* used to lookup symlink to put in dentry */
+        struct xfs_inode *cip = NULL;
+        struct xfs_name name;
        int             error;
        mode_t          mode;
-        cvp = NULL;
        mode = S_IFLNK |
                (irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO);
+        xfs_dentry_to_name(&name, dentry);
-        error = xfs_symlink(XFS_I(dir), dentry, (char *)symname, mode,
+        error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip, NULL);
-                            &cvp, NULL);
+        if (unlikely(error))
-        if (likely(!error && cvp)) {
+                goto out;
-                error = xfs_init_security(cvp, dir);
-                if (likely(!error)) {
+        inode = cip->i_vnode;
-                        ip = vn_to_inode(cvp);
-                        d_instantiate(dentry, ip);
+        error = xfs_init_security(inode, dir);
-                        xfs_validate_fields(dir);
+        if (unlikely(error))
-                        xfs_validate_fields(ip);
+                goto out_cleanup_inode;
-                } else {
-                        xfs_cleanup_inode(dir, cvp, dentry, 0);
+        d_instantiate(dentry, inode);
-                }
+        xfs_validate_fields(dir);
-        }
+        xfs_validate_fields(inode);
+        return 0;
+ out_cleanup_inode:
+        xfs_cleanup_inode(dir, inode, dentry, 0);
+ out:
        return -error;
 }
@@ -476,9 +489,12 @@ xfs_vn_rmdir(
        struct dentry   *dentry)
 {
        struct inode    *inode = dentry->d_inode;
+        struct xfs_name name;
        int             error;
-        error = xfs_rmdir(XFS_I(dir), dentry);
+        xfs_dentry_to_name(&name, dentry);
+        error = xfs_rmdir(XFS_I(dir), &name, XFS_I(inode));
        if (likely(!error)) {
                xfs_validate_fields(inode);
                xfs_validate_fields(dir);
@@ -494,12 +510,15 @@ xfs_vn_rename(
        struct dentry   *ndentry)
 {
        struct inode    *new_inode = ndentry->d_inode;
-        bhv_vnode_t     *tvp;   /* target directory */
+        struct xfs_name oname;
+        struct xfs_name nname;
        int             error;
-        tvp = vn_from_inode(ndir);
+        xfs_dentry_to_name(&oname, odentry);
+        xfs_dentry_to_name(&nname, ndentry);
-        error = xfs_rename(XFS_I(odir), odentry, tvp, ndentry);
+        error = xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode),
+                                                        XFS_I(ndir), &nname);
        if (likely(!error)) {
                if (new_inode)
                        xfs_validate_fields(new_inode);
@@ -700,11 +719,19 @@ xfs_vn_setattr(
        return -error;
 }
+/*
+ * block_truncate_page can return an error, but we can't propagate it
+ * at all here. Leave a complaint + stack trace in the syslog because
+ * this could be bad. If it is bad, we need to propagate the error further.
+ */
 STATIC void
 xfs_vn_truncate(
        struct inode    *inode)
 {
-        block_truncate_page(inode->i_mapping, inode->i_size, xfs_get_blocks);
+        int     error;
+        error = block_truncate_page(inode->i_mapping, inode->i_size,
+                                                        xfs_get_blocks);
+        WARN_ON(error);
 }
 STATIC int
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 3ca39c4e5d2a..e5143323e71f 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -99,7 +99,6 @@
 /*
 * Feature macros (disable/enable)
 */
-#undef  HAVE_REFCACHE   /* reference cache not needed for NFS in 2.6 */
 #define HAVE_SPLICE     /* a splice(2) exists in 2.6, but not in 2.4 */
 #ifdef CONFIG_SMP
 #define HAVE_PERCPU_SB  /* per cpu superblock counters are a 2.6 feature */
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 166353388490..21c0dbc74093 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -176,7 +176,6 @@ xfs_read(
 {
        struct file             *file = iocb->ki_filp;
        struct inode            *inode = file->f_mapping->host;
-        bhv_vnode_t             *vp = XFS_ITOV(ip);
        xfs_mount_t             *mp = ip->i_mount;
        size_t                  size = 0;
        ssize_t                 ret = 0;
@@ -228,11 +227,11 @@ xfs_read(
        xfs_ilock(ip, XFS_IOLOCK_SHARED);
        if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
-                bhv_vrwlock_t locktype = VRWLOCK_READ;
                int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags);
+                int iolock = XFS_IOLOCK_SHARED;
-                ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, vp, *offset, size,
+                ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *offset, size,
-                                        dmflags, &locktype);
+                                        dmflags, &iolock);
                if (ret) {
                        xfs_iunlock(ip, XFS_IOLOCK_SHARED);
                        if (unlikely(ioflags & IO_ISDIRECT))
@@ -242,7 +241,7 @@ xfs_read(
        }
        if (unlikely(ioflags & IO_ISDIRECT)) {
-                if (VN_CACHED(vp))
+                if (inode->i_mapping->nrpages)
                        ret = xfs_flushinval_pages(ip, (*offset & PAGE_CACHE_MASK),
                                                    -1, FI_REMAPF_LOCKED);
                mutex_unlock(&inode->i_mutex);
@@ -276,7 +275,6 @@ xfs_splice_read(
        int                     flags,
        int                     ioflags)
 {
-        bhv_vnode_t             *vp = XFS_ITOV(ip);
        xfs_mount_t             *mp = ip->i_mount;
        ssize_t                 ret;
@@ -287,11 +285,11 @@ xfs_splice_read(
        xfs_ilock(ip, XFS_IOLOCK_SHARED);
        if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
-                bhv_vrwlock_t locktype = VRWLOCK_READ;
+                int iolock = XFS_IOLOCK_SHARED;
                int error;
-                error = XFS_SEND_DATA(mp, DM_EVENT_READ, vp, *ppos, count,
+                error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *ppos, count,
-                                        FILP_DELAY_FLAG(infilp), &locktype);
+                                        FILP_DELAY_FLAG(infilp), &iolock);
                if (error) {
                        xfs_iunlock(ip, XFS_IOLOCK_SHARED);
                        return -error;
@@ -317,7 +315,6 @@ xfs_splice_write(
        int                     flags,
        int                     ioflags)
 {
-        bhv_vnode_t             *vp = XFS_ITOV(ip);
        xfs_mount_t             *mp = ip->i_mount;
        ssize_t                 ret;
        struct inode            *inode = outfilp->f_mapping->host;
@@ -330,11 +327,11 @@ xfs_splice_write(
        xfs_ilock(ip, XFS_IOLOCK_EXCL);
        if (DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS)) {
-                bhv_vrwlock_t locktype = VRWLOCK_WRITE;
+                int iolock = XFS_IOLOCK_EXCL;
                int error;
-                error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, vp, *ppos, count,
+                error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, *ppos, count,
-                                        FILP_DELAY_FLAG(outfilp), &locktype);
+                                        FILP_DELAY_FLAG(outfilp), &iolock);
                if (error) {
                        xfs_iunlock(ip, XFS_IOLOCK_EXCL);
                        return -error;
@@ -573,14 +570,12 @@ xfs_write(
        struct file             *file = iocb->ki_filp;
        struct address_space    *mapping = file->f_mapping;
        struct inode            *inode = mapping->host;
-        bhv_vnode_t             *vp = XFS_ITOV(xip);
        unsigned long           segs = nsegs;
        xfs_mount_t             *mp;
        ssize_t                 ret = 0, error = 0;
        xfs_fsize_t             isize, new_size;
        int                     iolock;
        int                     eventsent = 0;
-        bhv_vrwlock_t           locktype;
        size_t                  ocount = 0, count;
        loff_t                  pos;
        int                     need_i_mutex;
@@ -607,11 +602,9 @@ xfs_write(
 relock:
        if (ioflags & IO_ISDIRECT) {
                iolock = XFS_IOLOCK_SHARED;
-                locktype = VRWLOCK_WRITE_DIRECT;
                need_i_mutex = 0;
        } else {
                iolock = XFS_IOLOCK_EXCL;
-                locktype = VRWLOCK_WRITE;
                need_i_mutex = 1;
                mutex_lock(&inode->i_mutex);
        }
@@ -634,9 +627,8 @@ start:
                        dmflags |= DM_FLAGS_IMUX;
                xfs_iunlock(xip, XFS_ILOCK_EXCL);
-                error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp,
+                error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, xip,
-                                      pos, count,
+                                      pos, count, dmflags, &iolock);
-                                      dmflags, &locktype);
                if (error) {
                        goto out_unlock_internal;
                }
@@ -664,10 +656,9 @@ start:
                        return XFS_ERROR(-EINVAL);
                }
-                if (!need_i_mutex && (VN_CACHED(vp) || pos > xip->i_size)) {
+                if (!need_i_mutex && (mapping->nrpages || pos > xip->i_size)) {
                        xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
                        iolock = XFS_IOLOCK_EXCL;
-                        locktype = VRWLOCK_WRITE;
                        need_i_mutex = 1;
                        mutex_lock(&inode->i_mutex);
                        xfs_ilock(xip, XFS_ILOCK_EXCL|iolock);
@@ -727,7 +718,7 @@ retry:
        current->backing_dev_info = mapping->backing_dev_info;
        if ((ioflags & IO_ISDIRECT)) {
-                if (VN_CACHED(vp)) {
+                if (mapping->nrpages) {
                        WARN_ON(need_i_mutex == 0);
                        xfs_inval_cached_trace(xip, pos, -1,
                                        (pos & PAGE_CACHE_MASK), -1);
@@ -744,7 +735,6 @@ retry:
                        mutex_unlock(&inode->i_mutex);
                        iolock = XFS_IOLOCK_SHARED;
-                        locktype = VRWLOCK_WRITE_DIRECT;
                        need_i_mutex = 0;
                }
@@ -781,15 +771,15 @@ retry:
        if (ret == -ENOSPC &&
            DM_EVENT_ENABLED(xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) {
-                xfs_rwunlock(xip, locktype);
+                xfs_iunlock(xip, iolock);
                if (need_i_mutex)
                        mutex_unlock(&inode->i_mutex);
-                error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp,
+                error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, xip,
-                                DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL,
+                                DM_RIGHT_NULL, xip, DM_RIGHT_NULL, NULL, NULL,
                                0, 0, 0); /* Delay flag intentionally  unused */
                if (need_i_mutex)
                        mutex_lock(&inode->i_mutex);
-                xfs_rwlock(xip, locktype);
+                xfs_ilock(xip, iolock);
                if (error)
                        goto out_unlock_internal;
                pos = xip->i_size;
@@ -817,7 +807,8 @@ retry:
        /* Handle various SYNC-type writes */
        if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) {
                int error2;
-                xfs_rwunlock(xip, locktype);
+                xfs_iunlock(xip, iolock);
                if (need_i_mutex)
                        mutex_unlock(&inode->i_mutex);
                error2 = sync_page_range(inode, mapping, pos, ret);
@@ -825,7 +816,7 @@ retry:
                        error = error2;
                if (need_i_mutex)
                        mutex_lock(&inode->i_mutex);
-                xfs_rwlock(xip, locktype);
+                xfs_ilock(xip, iolock);
                error2 = xfs_write_sync_logforce(mp, xip);
                if (!error)
                        error = error2;
@@ -846,7 +837,7 @@ retry:
                        xip->i_d.di_size = xip->i_size;
                xfs_iunlock(xip, XFS_ILOCK_EXCL);
        }
-        xfs_rwunlock(xip, locktype);
+        xfs_iunlock(xip, iolock);
 out_unlock_mutex:
        if (need_i_mutex)
                mutex_unlock(&inode->i_mutex);
@@ -884,28 +875,23 @@ xfs_bdstrat_cb(struct xfs_buf *bp)
 }
 /*
- * Wrapper around bdstrat so that we can stop data
+ * Wrapper around bdstrat so that we can stop data from going to disk in case
- * from going to disk in case we are shutting down the filesystem.
+ * we are shutting down the filesystem.  Typically user data goes thru this
- * Typically user data goes thru this path; one of the exceptions
+ * path; one of the exceptions is the superblock.
- * is the superblock.
 */
-int
+void
 xfsbdstrat(
        struct xfs_mount        *mp,
        struct xfs_buf          *bp)
 {
        ASSERT(mp);
        if (!XFS_FORCED_SHUTDOWN(mp)) {
-                /* Grio redirection would go here
-                 * if (XFS_BUF_IS_GRIO(bp)) {
-                 */
                xfs_buf_iorequest(bp);
-                return 0;
+                return;
        }
        xfs_buftrace("XFSBDSTRAT IOERROR", bp);
-        return (xfs_bioerror_relse(bp));
+        xfs_bioerror_relse(bp);
 }
 /*
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
index e200253139cf..e1d498b4ba7a 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.h
+++ b/fs/xfs/linux-2.6/xfs_lrw.h
@@ -68,7 +68,8 @@ extern void xfs_inval_cached_trace(struct xfs_inode *,
 #define xfs_inval_cached_trace(ip, offset, len, first, last)
 #endif
-extern int xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
+/* errors from xfsbdstrat() must be extracted from the buffer */
+extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
 extern int xfs_bdstrat_cb(struct xfs_buf *);
 extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
diff --git a/fs/xfs/linux-2.6/xfs_stats.h b/fs/xfs/linux-2.6/xfs_stats.h
index 8ba7a2fa6c1d..afd0b0d5fdb2 100644
--- a/fs/xfs/linux-2.6/xfs_stats.h
+++ b/fs/xfs/linux-2.6/xfs_stats.h
@@ -144,8 +144,8 @@ extern void xfs_cleanup_procfs(void);
 # define XFS_STATS_DEC(count)
 # define XFS_STATS_ADD(count, inc)
-static __inline void xfs_init_procfs(void) { };
+static inline void xfs_init_procfs(void) { };
-static __inline void xfs_cleanup_procfs(void) { };
+static inline void xfs_cleanup_procfs(void) { };
 #endif  /* !CONFIG_PROC_FS */
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 8831d9518790..865eb708aa95 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -896,7 +896,8 @@ xfs_fs_write_inode(
        struct inode            *inode,
        int                     sync)
 {
-        int                     error = 0, flags = FLUSH_INODE;
+        int                     error = 0;
+        int                     flags = 0;
        xfs_itrace_entry(XFS_I(inode));
        if (sync) {
@@ -934,7 +935,7 @@ xfs_fs_clear_inode(
                xfs_inactive(ip);
                xfs_iflags_clear(ip, XFS_IMODIFIED);
                if (xfs_reclaim(ip))
-                        panic("%s: cannot reclaim 0x%p\n", __FUNCTION__, inode);
+                        panic("%s: cannot reclaim 0x%p\n", __func__, inode);
        }
        ASSERT(XFS_I(inode) == NULL);
@@ -1027,8 +1028,7 @@ xfs_sync_worker(
        int             error;
        if (!(mp->m_flags & XFS_MOUNT_RDONLY))
-                error = xfs_sync(mp, SYNC_FSDATA | SYNC_BDFLUSH | SYNC_ATTR |
+                error = xfs_sync(mp, SYNC_FSDATA | SYNC_BDFLUSH | SYNC_ATTR);
-                                     SYNC_REFCACHE | SYNC_SUPER);
        mp->m_sync_seq++;
        wake_up(&mp->m_wait_single_sync_task);
 }
@@ -1306,7 +1306,7 @@ xfs_fs_fill_super(
        void                    *data,
        int                     silent)
 {
-        struct inode            *rootvp;
+        struct inode            *root;
        struct xfs_mount        *mp = NULL;
        struct xfs_mount_args   *args = xfs_args_allocate(sb, silent);
        int                     error;
@@ -1344,19 +1344,18 @@ xfs_fs_fill_super(
        sb->s_time_gran = 1;
        set_posix_acl_flag(sb);
-        rootvp = igrab(mp->m_rootip->i_vnode);
+        root = igrab(mp->m_rootip->i_vnode);
-        if (!rootvp) {
+        if (!root) {
                error = ENOENT;
                goto fail_unmount;
        }
+        if (is_bad_inode(root)) {
-        sb->s_root = d_alloc_root(vn_to_inode(rootvp));
+                error = EINVAL;
-        if (!sb->s_root) {
-                error = ENOMEM;
                goto fail_vnrele;
        }
-        if (is_bad_inode(sb->s_root->d_inode)) {
+        sb->s_root = d_alloc_root(root);
-                error = EINVAL;
+        if (!sb->s_root) {
+                error = ENOMEM;
                goto fail_vnrele;
        }
@@ -1378,7 +1377,7 @@ fail_vnrele:
                dput(sb->s_root);
                sb->s_root = NULL;
        } else {
-                VN_RELE(rootvp);
+                iput(root);
        }
 fail_unmount:
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index 3efcf45b14ab..3efb7c6d3303 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -50,13 +50,7 @@ extern void xfs_qm_exit(void);
 # define set_posix_acl_flag(sb) do { } while (0)
 #endif
-#ifdef CONFIG_XFS_SECURITY
+#define XFS_SECURITY_STRING     "security attributes, "
-# define XFS_SECURITY_STRING    "security attributes, "
-# define ENOSECURITY            0
-#else
-# define XFS_SECURITY_STRING
-# define ENOSECURITY            EOPNOTSUPP
-#endif
 #ifdef CONFIG_XFS_RT
 # define XFS_REALTIME_STRING    "realtime, "
diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h
index 4da03a4e3520..7e60c7776b1c 100644
--- a/fs/xfs/linux-2.6/xfs_vfs.h
+++ b/fs/xfs/linux-2.6/xfs_vfs.h
@@ -49,7 +49,6 @@ typedef struct bhv_vfs_sync_work {
 #define SYNC_REFCACHE           0x0040  /* prune some of the nfs ref cache */
 #define SYNC_REMOUNT            0x0080  /* remount readonly, no dummy LRs */
 #define SYNC_IOWAIT             0x0100  /* wait for all I/O to complete */
-#define SYNC_SUPER              0x0200  /* flush superblock to disk */
 /*
 * When remounting a filesystem read-only or freezing the filesystem,
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index b5ea418693b1..8b4d63ce8694 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -23,8 +23,6 @@ struct bhv_vattr;
 struct xfs_iomap;
 struct attrlist_cursor_kern;
-typedef struct dentry   bhv_vname_t;
-typedef __u64           bhv_vnumber_t;
 typedef struct inode    bhv_vnode_t;
 #define VN_ISLNK(vp)    S_ISLNK((vp)->i_mode)
@@ -46,18 +44,6 @@ static inline struct inode *vn_to_inode(bhv_vnode_t *vnode)
 }
 /*
- * Values for the vop_rwlock/rwunlock flags parameter.
- */
-typedef enum bhv_vrwlock {
-        VRWLOCK_NONE,
-        VRWLOCK_READ,
-        VRWLOCK_WRITE,
-        VRWLOCK_WRITE_DIRECT,
-        VRWLOCK_TRY_READ,
-        VRWLOCK_TRY_WRITE
-} bhv_vrwlock_t;
-/*
 * Return values for xfs_inactive.  A return value of
 * VN_INACTIVE_NOCACHE implies that the file system behavior
 * has disassociated its state and bhv_desc_t from the vnode.
@@ -73,12 +59,9 @@ typedef enum bhv_vrwlock {
 #define IO_INVIS        0x00020         /* don't update inode timestamps */
 /*
- * Flags for vop_iflush call
+ * Flags for xfs_inode_flush
 */
 #define FLUSH_SYNC              1       /* wait for flush to complete   */
-#define FLUSH_INODE             2       /* flush the inode itself       */
-#define FLUSH_LOG               4       /* force the last log entry for
-                                         * this inode out to disk       */
 /*
 * Flush/Invalidate options for vop_toss/flush/flushinval_pages.
@@ -226,13 +209,6 @@ static inline bhv_vnode_t *vn_grab(bhv_vnode_t *vp)
 }
 /*
- * Vname handling macros.
- */
-#define VNAME(dentry)           ((char *) (dentry)->d_name.name)
-#define VNAMELEN(dentry)        ((dentry)->d_name.len)
-#define VNAME_TO_VNODE(dentry)  (vn_from_inode((dentry)->d_inode))
-/*
 * Dealing with bad inodes
 */
 static inline int VN_BAD(bhv_vnode_t *vp)
@@ -303,9 +279,9 @@ extern void xfs_itrace_hold(struct xfs_inode *, char *, int, inst_t *);
 extern void _xfs_itrace_ref(struct xfs_inode *, char *, int, inst_t *);
 extern void xfs_itrace_rele(struct xfs_inode *, char *, int, inst_t *);
 #define xfs_itrace_entry(ip)    \
-        _xfs_itrace_entry(ip, __FUNCTION__, (inst_t *)__return_address)
+        _xfs_itrace_entry(ip, __func__, (inst_t *)__return_address)
 #define xfs_itrace_exit(ip)     \
-        _xfs_itrace_exit(ip, __FUNCTION__, (inst_t *)__return_address)
+        _xfs_itrace_exit(ip, __func__, (inst_t *)__return_address)
 #define xfs_itrace_exit_tag(ip, tag)    \
        _xfs_itrace_exit(ip, tag, (inst_t *)__return_address)
 #define xfs_itrace_ref(ip)      \
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 665babcca6a6..631ebb31b295 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -1291,7 +1291,7 @@ xfs_qm_dqflush(
        if (flags & XFS_QMOPT_DELWRI) {
                xfs_bdwrite(mp, bp);
        } else if (flags & XFS_QMOPT_ASYNC) {
-                xfs_bawrite(mp, bp);
+                error = xfs_bawrite(mp, bp);
        } else {
                error = xfs_bwrite(mp, bp);
        }
@@ -1439,9 +1439,7 @@ xfs_qm_dqpurge(
        uint            flags)
 {
        xfs_dqhash_t    *thishash;
-        xfs_mount_t     *mp;
+        xfs_mount_t     *mp = dqp->q_mount;
-        mp = dqp->q_mount;
        ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp));
        ASSERT(XFS_DQ_IS_HASH_LOCKED(dqp->q_hash));
@@ -1485,6 +1483,7 @@ xfs_qm_dqpurge(
         * we're unmounting, we do care, so we flush it and wait.
         */
        if (XFS_DQ_IS_DIRTY(dqp)) {
+                int     error;
                xfs_dqtrace_entry(dqp, "DQPURGE ->DQFLUSH: DQDIRTY");
                /* dqflush unlocks dqflock */
                /*
@@ -1495,7 +1494,10 @@ xfs_qm_dqpurge(
                 * We don't care about getting disk errors here. We need
                 * to purge this dquot anyway, so we go ahead regardless.
                 */
-                (void) xfs_qm_dqflush(dqp, XFS_QMOPT_SYNC);
+                error = xfs_qm_dqflush(dqp, XFS_QMOPT_SYNC);
+                if (error)
+                        xfs_fs_cmn_err(CE_WARN, mp,
+                                "xfs_qm_dqpurge: dquot %p flush failed", dqp);
                xfs_dqflock(dqp);
        }
        ASSERT(dqp->q_pincount == 0);
@@ -1580,12 +1582,18 @@ xfs_qm_dqflock_pushbuf_wait(
                    XFS_INCORE_TRYLOCK);
        if (bp != NULL) {
                if (XFS_BUF_ISDELAYWRITE(bp)) {
+                        int     error;
                        if (XFS_BUF_ISPINNED(bp)) {
                                xfs_log_force(dqp->q_mount,
                                              (xfs_lsn_t)0,
                                              XFS_LOG_FORCE);
                        }
-                        xfs_bawrite(dqp->q_mount, bp);
+                        error = xfs_bawrite(dqp->q_mount, bp);
+                        if (error)
+                                xfs_fs_cmn_err(CE_WARN, dqp->q_mount,
+                                        "xfs_qm_dqflock_pushbuf_wait: "
+                                        "pushbuf error %d on dqp %p, bp %p",
+                                        error, dqp, bp);
                } else {
                        xfs_buf_relse(bp);
                }
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index 1800e8d1f646..36e05ca78412 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -146,6 +146,7 @@ xfs_qm_dquot_logitem_push(
        xfs_dq_logitem_t        *logitem)
 {
        xfs_dquot_t     *dqp;
+        int             error;
        dqp = logitem->qli_dquot;
@@ -161,7 +162,11 @@ xfs_qm_dquot_logitem_push(
         * lock without sleeping, then there must not have been
         * anyone in the process of flushing the dquot.
         */
-        xfs_qm_dqflush(dqp, XFS_B_DELWRI);
+        error = xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI);
+        if (error)
+                xfs_fs_cmn_err(CE_WARN, dqp->q_mount,
+                        "xfs_qm_dquot_logitem_push: push error %d on dqp %p",
+                        error, dqp);
        xfs_dqunlock(dqp);
 }
@@ -262,11 +267,16 @@ xfs_qm_dquot_logitem_pushbuf(
                                              XFS_LOG_FORCE);
                        }
                        if (dopush) {
+                                int     error;
 #ifdef XFSRACEDEBUG
                                delay_for_intr();
                                delay(300);
 #endif
-                                xfs_bawrite(mp, bp);
+                                error = xfs_bawrite(mp, bp);
+                                if (error)
+                                        xfs_fs_cmn_err(CE_WARN, mp,
+        "xfs_qm_dquot_logitem_pushbuf: pushbuf error %d on qip %p, bp %p",
+                                                        error, qip, bp);
                        } else {
                                xfs_buf_relse(bp);
                        }
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 8e9c5ae6504d..40ea56409561 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -304,8 +304,11 @@ xfs_qm_unmount_quotadestroy(
 * necessary data structures like quotainfo.  This is also responsible for
 * running a quotacheck as necessary.  We are guaranteed that the superblock
 * is consistently read in at this point.
+ *
+ * If we fail here, the mount will continue with quota turned off. We don't
+ * need to inidicate success or failure at all.
 */
-int
+void
 xfs_qm_mount_quotas(
        xfs_mount_t     *mp,
        int             mfsi_flags)
@@ -313,7 +316,6 @@ xfs_qm_mount_quotas(
        int             error = 0;
        uint            sbf;
        /*
         * If quotas on realtime volumes is not supported, we disable
         * quotas immediately.
@@ -332,7 +334,8 @@ xfs_qm_mount_quotas(
         * Allocate the quotainfo structure inside the mount struct, and
         * create quotainode(s), and change/rev superblock if necessary.
         */
-        if ((error = xfs_qm_init_quotainfo(mp))) {
+        error = xfs_qm_init_quotainfo(mp);
+        if (error) {
                /*
                 * We must turn off quotas.
                 */
@@ -344,12 +347,11 @@ xfs_qm_mount_quotas(
         * If any of the quotas are not consistent, do a quotacheck.
         */
        if (XFS_QM_NEED_QUOTACHECK(mp) &&
-                !(mfsi_flags & XFS_MFSI_NO_QUOTACHECK)) {
+            !(mfsi_flags & XFS_MFSI_NO_QUOTACHECK)) {
-                if ((error = xfs_qm_quotacheck(mp))) {
+                error = xfs_qm_quotacheck(mp);
-                        /* Quotacheck has failed and quotas have
+                if (error) {
-                         * been disabled.
+                        /* Quotacheck failed and disabled quotas. */
-                         */
+                        return;
-                        return XFS_ERROR(error);
                }
        }
        /* 
@@ -357,12 +359,10 @@ xfs_qm_mount_quotas(
         * quotachecked status, since we won't be doing accounting for
         * that type anymore.
         */
-        if (!XFS_IS_UQUOTA_ON(mp)) {
+        if (!XFS_IS_UQUOTA_ON(mp))
                mp->m_qflags &= ~XFS_UQUOTA_CHKD;
-        }
+        if (!(XFS_IS_GQUOTA_ON(mp) || XFS_IS_PQUOTA_ON(mp)))
-        if (!(XFS_IS_GQUOTA_ON(mp) || XFS_IS_PQUOTA_ON(mp))) {
                mp->m_qflags &= ~XFS_OQUOTA_CHKD;
-        }
 write_changes:
        /*
@@ -392,7 +392,7 @@ xfs_qm_mount_quotas(
                xfs_fs_cmn_err(CE_WARN, mp,
                        "Failed to initialize disk quotas.");
        }
-        return XFS_ERROR(error);
+        return;
 }
 /*
@@ -1438,7 +1438,7 @@ xfs_qm_qino_alloc(
 }
-STATIC int
+STATIC void
 xfs_qm_reset_dqcounts(
        xfs_mount_t     *mp,
        xfs_buf_t       *bp,
@@ -1478,8 +1478,6 @@ xfs_qm_reset_dqcounts(
                ddq->d_rtbwarns = 0;
                ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1);
        }
-        return 0;
 }
 STATIC int
@@ -1520,7 +1518,7 @@ xfs_qm_dqiter_bufs(
                if (error)
                        break;
-                (void) xfs_qm_reset_dqcounts(mp, bp, firstid, type);
+                xfs_qm_reset_dqcounts(mp, bp, firstid, type);
                xfs_bdwrite(mp, bp);
                /*
                 * goto the next block.
@@ -1810,7 +1808,7 @@ xfs_qm_dqusage_adjust(
         * Now release the inode. This will send it to 'inactive', and
         * possibly even free blocks.
         */
-        VN_RELE(XFS_ITOV(ip));
+        IRELE(ip);
        /*
         * Goto next inode.
@@ -1880,6 +1878,14 @@ xfs_qm_quotacheck(
        } while (! done);
        /*
+         * We've made all the changes that we need to make incore.
+         * Flush them down to disk buffers if everything was updated
+         * successfully.
+         */
+        if (!error)
+                error = xfs_qm_dqflush_all(mp, XFS_QMOPT_DELWRI);
+        /*
         * We can get this error if we couldn't do a dquot allocation inside
         * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the
         * dirty dquots that might be cached, we just want to get rid of them
@@ -1890,11 +1896,6 @@ xfs_qm_quotacheck(
                xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_QUOTAOFF);
                goto error_return;
        }
-        /*
-         * We've made all the changes that we need to make incore.
-         * Now flush_them down to disk buffers.
-         */
-        xfs_qm_dqflush_all(mp, XFS_QMOPT_DELWRI);
        /*
         * We didn't log anything, because if we crashed, we'll have to
@@ -1926,7 +1927,10 @@ xfs_qm_quotacheck(
                ASSERT(mp->m_quotainfo != NULL);
                ASSERT(xfs_Gqm != NULL);
                xfs_qm_destroy_quotainfo(mp);
-                (void)xfs_mount_reset_sbqflags(mp);
+                if (xfs_mount_reset_sbqflags(mp)) {
+                        cmn_err(CE_WARN, "XFS quotacheck %s: "
+                                "Failed to reset quota flags.", mp->m_fsname);
+                }
        } else {
                cmn_err(CE_NOTE, "XFS quotacheck %s: Done.", mp->m_fsname);
        }
@@ -1968,7 +1972,7 @@ xfs_qm_init_quotainos(
                        if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
                                             0, 0, &gip, 0))) {
                                if (uip)
-                                        VN_RELE(XFS_ITOV(uip));
+                                        IRELE(uip);
                                return XFS_ERROR(error);
                        }
                }
@@ -1999,7 +2003,7 @@ xfs_qm_init_quotainos(
                                          sbflags | XFS_SB_GQUOTINO, flags);
                if (error) {
                        if (uip)
-                                VN_RELE(XFS_ITOV(uip));
+                                IRELE(uip);
                        return XFS_ERROR(error);
                }
@@ -2093,12 +2097,17 @@ xfs_qm_shake_freelist(
                 * dirty dquots.
                 */
                if (XFS_DQ_IS_DIRTY(dqp)) {
+                        int     error;
                        xfs_dqtrace_entry(dqp, "DQSHAKE: DQDIRTY");
                        /*
                         * We flush it delayed write, so don't bother
                         * releasing the mplock.
                         */
-                        (void) xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI);
+                        error = xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI);
+                        if (error) {
+                                xfs_fs_cmn_err(CE_WARN, dqp->q_mount,
+                        "xfs_qm_dqflush_all: dquot %p flush failed", dqp);
+                        }
                        xfs_dqunlock(dqp); /* dqflush unlocks dqflock */
                        dqp = dqp->dq_flnext;
                        continue;
@@ -2265,12 +2274,17 @@ xfs_qm_dqreclaim_one(void)
                 * dirty dquots.
                 */
                if (XFS_DQ_IS_DIRTY(dqp)) {
+                        int     error;
                        xfs_dqtrace_entry(dqp, "DQRECLAIM: DQDIRTY");
                        /*
                         * We flush it delayed write, so don't bother
                         * releasing the freelist lock.
                         */
-                        (void) xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI);
+                        error = xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI);
+                        if (error) {
+                                xfs_fs_cmn_err(CE_WARN, dqp->q_mount,
+                        "xfs_qm_dqreclaim: dquot %p flush failed", dqp);
+                        }
                        xfs_dqunlock(dqp); /* dqflush unlocks dqflock */
                        continue;
                }
@@ -2378,9 +2392,9 @@ xfs_qm_write_sb_changes(
        }
        xfs_mod_sb(tp, flags);
-        (void) xfs_trans_commit(tp, 0);
+        error = xfs_trans_commit(tp, 0);
-        return 0;
+        return error;
 }
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
index baf537c1c177..cd2300e374af 100644
--- a/fs/xfs/quota/xfs_qm.h
+++ b/fs/xfs/quota/xfs_qm.h
@@ -165,7 +165,7 @@ typedef struct xfs_dquot_acct {
 #define XFS_QM_RELE(xqm)        ((xqm)->qm_nrefs--)
 extern void             xfs_qm_destroy_quotainfo(xfs_mount_t *);
-extern int              xfs_qm_mount_quotas(xfs_mount_t *, int);
+extern void             xfs_qm_mount_quotas(xfs_mount_t *, int);
 extern int              xfs_qm_quotacheck(xfs_mount_t *);
 extern void             xfs_qm_unmount_quotadestroy(xfs_mount_t *);
 extern int              xfs_qm_unmount_quotas(xfs_mount_t *);
diff --git a/fs/xfs/quota/xfs_qm_stats.h b/fs/xfs/quota/xfs_qm_stats.h
index a50ffabcf554..5b964fc0dc09 100644
--- a/fs/xfs/quota/xfs_qm_stats.h
+++ b/fs/xfs/quota/xfs_qm_stats.h
@@ -45,8 +45,8 @@ extern void xfs_qm_cleanup_procfs(void);
 # define XQM_STATS_INC(count)   do { } while (0)
-static __inline void xfs_qm_init_procfs(void) { };
+static inline void xfs_qm_init_procfs(void) { };
-static __inline void xfs_qm_cleanup_procfs(void) { };
+static inline void xfs_qm_cleanup_procfs(void) { };
 #endif
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index d2b8be7e75f9..8342823dbdc3 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -279,9 +279,12 @@ xfs_qm_scall_quotaoff(
        /*
         * Write the LI_QUOTAOFF log record, and do SB changes atomically,
-         * and synchronously.
+         * and synchronously. If we fail to write, we should abort the
+         * operation as it cannot be recovered safely if we crash.
         */
-        xfs_qm_log_quotaoff(mp, &qoffstart, flags);
+        error = xfs_qm_log_quotaoff(mp, &qoffstart, flags);
+        if (error)
+                goto out_error;
        /*
         * Next we clear the XFS_MOUNT_*DQ_ACTIVE bit(s) in the mount struct
@@ -337,7 +340,12 @@ xfs_qm_scall_quotaoff(
         * So, we have QUOTAOFF start and end logitems; the start
         * logitem won't get overwritten until the end logitem appears...
         */
-        xfs_qm_log_quotaoff_end(mp, qoffstart, flags);
+        error = xfs_qm_log_quotaoff_end(mp, qoffstart, flags);
+        if (error) {
+                /* We're screwed now. Shutdown is the only option. */
+                xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+                goto out_error;
+        }
        /*
         * If quotas is completely disabled, close shop.
@@ -361,6 +369,7 @@ xfs_qm_scall_quotaoff(
                XFS_PURGE_INODE(XFS_QI_GQIP(mp));
                XFS_QI_GQIP(mp) = NULL;
        }
+out_error:
        mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
        return (error);
@@ -371,12 +380,11 @@ xfs_qm_scall_trunc_qfiles(
        xfs_mount_t     *mp,
        uint            flags)
 {
-        int             error;
+        int             error = 0, error2 = 0;
        xfs_inode_t     *qip;
        if (!capable(CAP_SYS_ADMIN))
                return XFS_ERROR(EPERM);
-        error = 0;
        if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) {
                qdprintk("qtrunc flags=%x m_qflags=%x\n", flags, mp->m_qflags);
                return XFS_ERROR(EINVAL);
@@ -384,22 +392,22 @@ xfs_qm_scall_trunc_qfiles(
        if ((flags & XFS_DQ_USER) && mp->m_sb.sb_uquotino != NULLFSINO) {
                error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, 0, &qip, 0);
-                if (! error) {
+                if (!error) {
-                        (void) xfs_truncate_file(mp, qip);
+                        error = xfs_truncate_file(mp, qip);
-                        VN_RELE(XFS_ITOV(qip));
+                        IRELE(qip);
                }
        }
        if ((flags & (XFS_DQ_GROUP|XFS_DQ_PROJ)) &&
            mp->m_sb.sb_gquotino != NULLFSINO) {
-                error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, 0, &qip, 0);
+                error2 = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, 0, &qip, 0);
-                if (! error) {
+                if (!error2) {
-                        (void) xfs_truncate_file(mp, qip);
+                        error2 = xfs_truncate_file(mp, qip);
-                        VN_RELE(XFS_ITOV(qip));
+                        IRELE(qip);
                }
        }
-        return (error);
+        return error ? error : error2;
 }
@@ -552,13 +560,13 @@ xfs_qm_scall_getqstat(
                out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks;
                out->qs_uquota.qfs_nextents = uip->i_d.di_nextents;
                if (tempuqip)
-                        VN_RELE(XFS_ITOV(uip));
+                        IRELE(uip);
        }
        if (gip) {
                out->qs_gquota.qfs_nblks = gip->i_d.di_nblocks;
                out->qs_gquota.qfs_nextents = gip->i_d.di_nextents;
                if (tempgqip)
-                        VN_RELE(XFS_ITOV(gip));
+                        IRELE(gip);
        }
        if (mp->m_quotainfo) {
                out->qs_incoredqs = XFS_QI_MPLNDQUOTS(mp);
@@ -726,12 +734,12 @@ xfs_qm_scall_setqlim(
        xfs_trans_log_dquot(tp, dqp);
        xfs_dqtrace_entry(dqp, "Q_SETQLIM: COMMIT");
-        xfs_trans_commit(tp, 0);
+        error = xfs_trans_commit(tp, 0);
        xfs_qm_dqprint(dqp);
        xfs_qm_dqrele(dqp);
        mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
-        return (0);
+        return error;
 }
 STATIC int
@@ -1095,7 +1103,7 @@ again:
                 * inactive code in hell.
                 */
                if (vnode_refd)
-                        VN_RELE(vp);
+                        IRELE(ip);
                XFS_MOUNT_ILOCK(mp);
                /*
                 * If an inode was inserted or removed, we gotta
diff --git a/fs/xfs/support/ktrace.c b/fs/xfs/support/ktrace.c
index 129067cfcb86..0b75d302508f 100644
--- a/fs/xfs/support/ktrace.c
+++ b/fs/xfs/support/ktrace.c
@@ -24,7 +24,7 @@ static int          ktrace_zentries;
 void __init
 ktrace_init(int zentries)
 {
-        ktrace_zentries = zentries;
+        ktrace_zentries = roundup_pow_of_two(zentries);
        ktrace_hdr_zone = kmem_zone_init(sizeof(ktrace_t),
                                        "ktrace_hdr");
@@ -47,13 +47,16 @@ ktrace_uninit(void)
 * ktrace_alloc()
 *
 * Allocate a ktrace header and enough buffering for the given
- * number of entries.
+ * number of entries. Round the number of entries up to a
+ * power of 2 so we can do fast masking to get the index from
+ * the atomic index counter.
 */
 ktrace_t *
 ktrace_alloc(int nentries, unsigned int __nocast sleep)
 {
        ktrace_t        *ktp;
        ktrace_entry_t  *ktep;
+        int             entries;
        ktp = (ktrace_t*)kmem_zone_alloc(ktrace_hdr_zone, sleep);
@@ -70,11 +73,12 @@ ktrace_alloc(int nentries, unsigned int __nocast sleep)
        /*
         * Special treatment for buffers with the ktrace_zentries entries
         */
-        if (nentries == ktrace_zentries) {
+        entries = roundup_pow_of_two(nentries);
+        if (entries == ktrace_zentries) {
                ktep = (ktrace_entry_t*)kmem_zone_zalloc(ktrace_ent_zone,
                                                            sleep);
        } else {
-                ktep = (ktrace_entry_t*)kmem_zalloc((nentries * sizeof(*ktep)),
+                ktep = (ktrace_entry_t*)kmem_zalloc((entries * sizeof(*ktep)),
                                                            sleep | KM_LARGE);
        }
@@ -91,8 +95,10 @@ ktrace_alloc(int nentries, unsigned int __nocast sleep)
        }
        ktp->kt_entries  = ktep;
-        ktp->kt_nentries = nentries;
+        ktp->kt_nentries = entries;
-        ktp->kt_index    = 0;
+        ASSERT(is_power_of_2(entries));
+        ktp->kt_index_mask = entries - 1;
+        atomic_set(&ktp->kt_index, 0);
        ktp->kt_rollover = 0;
        return ktp;
 }
@@ -151,8 +157,6 @@ ktrace_enter(
        void            *val14,
        void            *val15)
 {
-        static DEFINE_SPINLOCK(wrap_lock);
-        unsigned long   flags;
        int             index;
        ktrace_entry_t  *ktep;
@@ -161,12 +165,8 @@ ktrace_enter(
        /*
         * Grab an entry by pushing the index up to the next one.
         */
-        spin_lock_irqsave(&wrap_lock, flags);
+        index = atomic_add_return(1, &ktp->kt_index);
-        index = ktp->kt_index;
+        index = (index - 1) & ktp->kt_index_mask;
-        if (++ktp->kt_index == ktp->kt_nentries)
-                ktp->kt_index = 0;
-        spin_unlock_irqrestore(&wrap_lock, flags);
        if (!ktp->kt_rollover && index == ktp->kt_nentries - 1)
                ktp->kt_rollover = 1;
@@ -199,11 +199,12 @@ int
 ktrace_nentries(
        ktrace_t        *ktp)
 {
-        if (ktp == NULL) {
+        int     index;
+        if (ktp == NULL)
                return 0;
-        }
-        return (ktp->kt_rollover ? ktp->kt_nentries : ktp->kt_index);
+        index = atomic_read(&ktp->kt_index) & ktp->kt_index_mask;
+        return (ktp->kt_rollover ? ktp->kt_nentries : index);
 }
 /*
@@ -228,7 +229,7 @@ ktrace_first(ktrace_t   *ktp, ktrace_snap_t     *ktsp)
        int             nentries;
        if (ktp->kt_rollover)
-                index = ktp->kt_index;
+                index = atomic_read(&ktp->kt_index) & ktp->kt_index_mask;
        else
                index = 0;
diff --git a/fs/xfs/support/ktrace.h b/fs/xfs/support/ktrace.h
index 56e72b40a859..741d6947ca60 100644
--- a/fs/xfs/support/ktrace.h
+++ b/fs/xfs/support/ktrace.h
@@ -30,7 +30,8 @@ typedef struct ktrace_entry {
 */
 typedef struct ktrace {
        int             kt_nentries;    /* number of entries in trace buf */
-        int             kt_index;       /* current index in entries */
+        atomic_t        kt_index;       /* current index in entries */
+        unsigned int    kt_index_mask;
        int             kt_rollover;
        ktrace_entry_t  *kt_entries;    /* buffer of entries */
 } ktrace_t;
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h
index 540e4c989825..765aaf65e2d3 100644
--- a/fs/xfs/xfs.h
+++ b/fs/xfs/xfs.h
@@ -22,7 +22,7 @@
 #define STATIC
 #define DEBUG 1
 #define XFS_BUF_LOCK_TRACKING 1
-/* #define QUOTADEBUG 1 */
+#define QUOTADEBUG 1
 #endif
 #ifdef CONFIG_XFS_TRACE
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 7272fe39a92d..8e130b9720ae 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -307,12 +307,13 @@ xfs_acl_vset(
        VN_HOLD(vp);
        error = xfs_acl_allow_set(vp, kind);
-        if (error)
-                goto out;
        /* Incoming ACL exists, set file mode based on its value */
-        if (kind == _ACL_TYPE_ACCESS)
+        if (!error && kind == _ACL_TYPE_ACCESS)
-                xfs_acl_setmode(vp, xfs_acl, &basicperms);
+                error = xfs_acl_setmode(vp, xfs_acl, &basicperms);
+        if (error)
+                goto out;
        /*
         * If we have more than std unix permissions, set up the actual attr.
@@ -323,7 +324,7 @@ xfs_acl_vset(
        if (!basicperms) {
                xfs_acl_set_attr(vp, xfs_acl, kind, &error);
        } else {
-                xfs_acl_vremove(vp, _ACL_TYPE_ACCESS);
+                error = -xfs_acl_vremove(vp, _ACL_TYPE_ACCESS);
        }
 out:
@@ -707,7 +708,9 @@ xfs_acl_inherit(
        memcpy(cacl, pdaclp, sizeof(xfs_acl_t));
        xfs_acl_filter_mode(mode, cacl);
-        xfs_acl_setmode(vp, cacl, &basicperms);
+        error = xfs_acl_setmode(vp, cacl, &basicperms);
+        if (error)
+                goto out_error;
        /*
         * Set the Default and Access ACL on the file.  The mode is already
@@ -720,6 +723,7 @@ xfs_acl_inherit(
                xfs_acl_set_attr(vp, pdaclp, _ACL_TYPE_DEFAULT, &error);
        if (!error && !basicperms)
                xfs_acl_set_attr(vp, cacl, _ACL_TYPE_ACCESS, &error);
+out_error:
        _ACL_FREE(cacl);
        return error;
 }
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index bdbfbbee4959..1956f83489f1 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -45,7 +45,7 @@
 #define XFSA_FIXUP_BNO_OK       1
 #define XFSA_FIXUP_CNT_OK       2
-STATIC int
+STATIC void
 xfs_alloc_search_busy(xfs_trans_t *tp,
                    xfs_agnumber_t agno,
                    xfs_agblock_t bno,
@@ -55,24 +55,24 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
 ktrace_t *xfs_alloc_trace_buf;
 #define TRACE_ALLOC(s,a)        \
-        xfs_alloc_trace_alloc(__FUNCTION__, s, a, __LINE__)
+        xfs_alloc_trace_alloc(__func__, s, a, __LINE__)
 #define TRACE_FREE(s,a,b,x,f)   \
-        xfs_alloc_trace_free(__FUNCTION__, s, mp, a, b, x, f, __LINE__)
+        xfs_alloc_trace_free(__func__, s, mp, a, b, x, f, __LINE__)
 #define TRACE_MODAGF(s,a,f)     \
-        xfs_alloc_trace_modagf(__FUNCTION__, s, mp, a, f, __LINE__)
+        xfs_alloc_trace_modagf(__func__, s, mp, a, f, __LINE__)
-#define TRACE_BUSY(__FUNCTION__,s,ag,agb,l,sl,tp)       \
+#define TRACE_BUSY(__func__,s,ag,agb,l,sl,tp)   \
-        xfs_alloc_trace_busy(__FUNCTION__, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSY, __LINE__)
+        xfs_alloc_trace_busy(__func__, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSY, __LINE__)
-#define TRACE_UNBUSY(__FUNCTION__,s,ag,sl,tp)   \
+#define TRACE_UNBUSY(__func__,s,ag,sl,tp)       \
-        xfs_alloc_trace_busy(__FUNCTION__, s, mp, ag, -1, -1, sl, tp, XFS_ALLOC_KTRACE_UNBUSY, __LINE__)
+        xfs_alloc_trace_busy(__func__, s, mp, ag, -1, -1, sl, tp, XFS_ALLOC_KTRACE_UNBUSY, __LINE__)
-#define TRACE_BUSYSEARCH(__FUNCTION__,s,ag,agb,l,sl,tp) \
+#define TRACE_BUSYSEARCH(__func__,s,ag,agb,l,tp)        \
-        xfs_alloc_trace_busy(__FUNCTION__, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSYSEARCH, __LINE__)
+        xfs_alloc_trace_busy(__func__, s, mp, ag, agb, l, 0, tp, XFS_ALLOC_KTRACE_BUSYSEARCH, __LINE__)
 #else
 #define TRACE_ALLOC(s,a)
 #define TRACE_FREE(s,a,b,x,f)
 #define TRACE_MODAGF(s,a,f)
 #define TRACE_BUSY(s,a,ag,agb,l,sl,tp)
 #define TRACE_UNBUSY(fname,s,ag,sl,tp)
-#define TRACE_BUSYSEARCH(fname,s,ag,agb,l,sl,tp)
+#define TRACE_BUSYSEARCH(fname,s,ag,agb,l,tp)
 #endif  /* XFS_ALLOC_TRACE */
 /*
@@ -93,7 +93,7 @@ STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
 * Compute aligned version of the found extent.
 * Takes alignment and min length into account.
 */
-STATIC int                              /* success (>= minlen) */
+STATIC void
 xfs_alloc_compute_aligned(
        xfs_agblock_t   foundbno,       /* starting block in found extent */
        xfs_extlen_t    foundlen,       /* length in found extent */
@@ -116,7 +116,6 @@ xfs_alloc_compute_aligned(
        }
        *resbno = bno;
        *reslen = len;
-        return len >= minlen;
 }
 /*
@@ -837,9 +836,9 @@ xfs_alloc_ag_vextent_near(
                        if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
                                goto error0;
                        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-                        if (!xfs_alloc_compute_aligned(ltbno, ltlen,
+                        xfs_alloc_compute_aligned(ltbno, ltlen, args->alignment,
-                                        args->alignment, args->minlen,
+                                        args->minlen, &ltbnoa, &ltlena);
-                                        &ltbnoa, &ltlena))
+                        if (ltlena < args->minlen)
                                continue;
                        args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
                        xfs_alloc_fix_len(args);
@@ -958,9 +957,9 @@ xfs_alloc_ag_vextent_near(
                        if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
                                goto error0;
                        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-                        if (xfs_alloc_compute_aligned(ltbno, ltlen,
+                        xfs_alloc_compute_aligned(ltbno, ltlen, args->alignment,
-                                        args->alignment, args->minlen,
+                                        args->minlen, &ltbnoa, &ltlena);
-                                        &ltbnoa, &ltlena))
+                        if (ltlena >= args->minlen)
                                break;
                        if ((error = xfs_alloc_decrement(bno_cur_lt, 0, &i)))
                                goto error0;
@@ -974,9 +973,9 @@ xfs_alloc_ag_vextent_near(
                        if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
                                goto error0;
                        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-                        if (xfs_alloc_compute_aligned(gtbno, gtlen,
+                        xfs_alloc_compute_aligned(gtbno, gtlen, args->alignment,
-                                        args->alignment, args->minlen,
+                                        args->minlen, &gtbnoa, &gtlena);
-                                        &gtbnoa, &gtlena))
+                        if (gtlena >= args->minlen)
                                break;
                        if ((error = xfs_alloc_increment(bno_cur_gt, 0, &i)))
                                goto error0;
@@ -2562,9 +2561,10 @@ xfs_alloc_clear_busy(xfs_trans_t *tp,
 /*
- * returns non-zero if any of (agno,bno):len is in a busy list
+ * If we find the extent in the busy list, force the log out to get the
+ * extent out of the busy list so the caller can use it straight away.
 */
-STATIC int
+STATIC void
 xfs_alloc_search_busy(xfs_trans_t *tp,
                    xfs_agnumber_t agno,
                    xfs_agblock_t bno,
@@ -2572,7 +2572,6 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
 {
        xfs_mount_t             *mp;
        xfs_perag_busy_t        *bsy;
-        int                     n;
        xfs_agblock_t           uend, bend;
        xfs_lsn_t               lsn;
        int                     cnt;
@@ -2585,21 +2584,18 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
        uend = bno + len - 1;
        /* search pagb_list for this slot, skipping open slots */
-        for (bsy = mp->m_perag[agno].pagb_list, n = 0;
+        for (bsy = mp->m_perag[agno].pagb_list; cnt; bsy++) {
-             cnt; bsy++, n++) {
                /*
                 * (start1,length1) within (start2, length2)
                 */
                if (bsy->busy_tp != NULL) {
                        bend = bsy->busy_start + bsy->busy_length - 1;
-                        if ((bno > bend) ||
+                        if ((bno > bend) || (uend < bsy->busy_start)) {
-                            (uend < bsy->busy_start)) {
                                cnt--;
                        } else {
                                TRACE_BUSYSEARCH("xfs_alloc_search_busy",
-                                                 "found1", agno, bno, len, n,
+                                         "found1", agno, bno, len, tp);
-                                                 tp);
                                break;
                        }
                }
@@ -2610,15 +2606,12 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
         * transaction that freed the block
         */
        if (cnt) {
-                TRACE_BUSYSEARCH("xfs_alloc_search_busy", "found", agno, bno, len, n, tp);
+                TRACE_BUSYSEARCH("xfs_alloc_search_busy", "found", agno, bno, len, tp);
                lsn = bsy->busy_tp->t_commit_lsn;
                spin_unlock(&mp->m_perag[agno].pagb_lock);
                xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC);
        } else {
-                TRACE_BUSYSEARCH("xfs_alloc_search_busy", "not-found", agno, bno, len, n, tp);
+                TRACE_BUSYSEARCH("xfs_alloc_search_busy", "not-found", agno, bno, len, tp);
-                n = -1;
                spin_unlock(&mp->m_perag[agno].pagb_lock);
        }
-        return n;
 }
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index e58f321fdae9..36d781ee5fcc 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -2647,14 +2647,6 @@ attr_trusted_capable(
 }
 STATIC int
-attr_secure_capable(
-        bhv_vnode_t     *vp,
-        cred_t          *cred)
-{
-        return -ENOSECURITY;
-}
-STATIC int
 attr_system_set(
        bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
 {
@@ -2724,7 +2716,7 @@ struct attrnames attr_secure = {
        .attr_get       = attr_generic_get,
        .attr_set       = attr_generic_set,
        .attr_remove    = attr_generic_remove,
-        .attr_capable   = attr_secure_capable,
+        .attr_capable   = (attrcapable_t)fs_noerr,
 };
 struct attrnames attr_user = {
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 96ba6aa4ed8c..303d41e4217b 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -166,7 +166,7 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
        if (!(mp->m_flags & XFS_MOUNT_ATTR2)) {
                if (bytes <= XFS_IFORK_ASIZE(dp))
-                        return mp->m_attroffset >> 3;
+                        return dp->i_d.di_forkoff;
                return 0;
        }
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 2def273855a2..eb198c01c35d 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -323,13 +323,13 @@ xfs_bmap_trace_pre_update(
        int             whichfork);     /* data or attr fork */
 #define XFS_BMAP_TRACE_DELETE(d,ip,i,c,w)       \
-        xfs_bmap_trace_delete(__FUNCTION__,d,ip,i,c,w)
+        xfs_bmap_trace_delete(__func__,d,ip,i,c,w)
 #define XFS_BMAP_TRACE_INSERT(d,ip,i,c,r1,r2,w) \
-        xfs_bmap_trace_insert(__FUNCTION__,d,ip,i,c,r1,r2,w)
+        xfs_bmap_trace_insert(__func__,d,ip,i,c,r1,r2,w)
 #define XFS_BMAP_TRACE_POST_UPDATE(d,ip,i,w)    \
-        xfs_bmap_trace_post_update(__FUNCTION__,d,ip,i,w)
+        xfs_bmap_trace_post_update(__func__,d,ip,i,w)
 #define XFS_BMAP_TRACE_PRE_UPDATE(d,ip,i,w)     \
-        xfs_bmap_trace_pre_update(__FUNCTION__,d,ip,i,w)
+        xfs_bmap_trace_pre_update(__func__,d,ip,i,w)
 #else
 #define XFS_BMAP_TRACE_DELETE(d,ip,i,c,w)
 #define XFS_BMAP_TRACE_INSERT(d,ip,i,c,r1,r2,w)
@@ -2402,7 +2402,7 @@ xfs_bmap_extsize_align(
 #define XFS_ALLOC_GAP_UNITS     4
-STATIC int
+STATIC void
 xfs_bmap_adjacent(
        xfs_bmalloca_t  *ap)            /* bmap alloc argument struct */
 {
@@ -2548,7 +2548,6 @@ xfs_bmap_adjacent(
                        ap->rval = gotbno;
        }
 #undef ISVALID
-        return 0;
 }
 STATIC int
@@ -4154,16 +4153,21 @@ xfs_bmap_compute_maxlevels(
         * number of leaf entries, is controlled by the type of di_nextents
         * (a signed 32-bit number, xfs_extnum_t), or by di_anextents
         * (a signed 16-bit number, xfs_aextnum_t).
+         *
+         * Note that we can no longer assume that if we are in ATTR1 that
+         * the fork offset of all the inodes will be (m_attroffset >> 3)
+         * because we could have mounted with ATTR2 and then mounted back
+         * with ATTR1, keeping the di_forkoff's fixed but probably at
+         * various positions. Therefore, for both ATTR1 and ATTR2
+         * we have to assume the worst case scenario of a minimum size
+         * available.
         */
        if (whichfork == XFS_DATA_FORK) {
                maxleafents = MAXEXTNUM;
-                sz = (mp->m_flags & XFS_MOUNT_ATTR2) ?
+                sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS);
-                        XFS_BMDR_SPACE_CALC(MINDBTPTRS) : mp->m_attroffset;
        } else {
                maxleafents = MAXAEXTNUM;
-                sz = (mp->m_flags & XFS_MOUNT_ATTR2) ?
+                sz = XFS_BMDR_SPACE_CALC(MINABTPTRS);
-                        XFS_BMDR_SPACE_CALC(MINABTPTRS) :
-                        mp->m_sb.sb_inodesize - mp->m_attroffset;
        }
        maxrootrecs = (int)XFS_BTREE_BLOCK_MAXRECS(sz, xfs_bmdr, 0);
        minleafrecs = mp->m_bmap_dmnr[0];
@@ -5772,7 +5776,6 @@ xfs_getbmap(
        int                     error;          /* return value */
        __int64_t               fixlen;         /* length for -1 case */
        int                     i;              /* extent number */
-        bhv_vnode_t             *vp;            /* corresponding vnode */
        int                     lock;           /* lock state */
        xfs_bmbt_irec_t         *map;           /* buffer for user's data */
        xfs_mount_t             *mp;            /* file system mount point */
@@ -5789,7 +5792,6 @@ xfs_getbmap(
        int                     bmapi_flags;    /* flags for xfs_bmapi */
        __int32_t               oflags;         /* getbmapx bmv_oflags field */
-        vp = XFS_ITOV(ip);
        mp = ip->i_mount;
        whichfork = interface & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK;
@@ -5811,7 +5813,7 @@ xfs_getbmap(
        if ((interface & BMV_IF_NO_DMAPI_READ) == 0 &&
            DM_EVENT_ENABLED(ip, DM_EVENT_READ) &&
            whichfork == XFS_DATA_FORK) {
-                error = XFS_SEND_DATA(mp, DM_EVENT_READ, vp, 0, 0, 0, NULL);
+                error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, 0, 0, 0, NULL);
                if (error)
                        return XFS_ERROR(error);
        }
@@ -5869,6 +5871,10 @@ xfs_getbmap(
                /* xfs_fsize_t last_byte = xfs_file_last_byte(ip); */
                error = xfs_flush_pages(ip, (xfs_off_t)0,
                                               -1, 0, FI_REMAPF);
+                if (error) {
+                        xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+                return error;
+                }
        }
        ASSERT(whichfork == XFS_ATTR_FORK || ip->i_delayed_blks == 0);
@@ -6162,10 +6168,10 @@ xfs_check_block(
                        }
                        if (*thispa == *pp) {
                                cmn_err(CE_WARN, "%s: thispa(%d) == pp(%d) %Ld",
-                                        __FUNCTION__, j, i,
+                                        __func__, j, i,
                                        (unsigned long long)be64_to_cpu(*thispa));
                                panic("%s: ptrs are equal in node\n",
-                                        __FUNCTION__);
+                                        __func__);
                        }
                }
        }
@@ -6192,7 +6198,7 @@ xfs_bmap_check_leaf_extents(
        xfs_mount_t             *mp;    /* file system mount structure */
        __be64                  *pp;    /* pointer to block address */
        xfs_bmbt_rec_t          *ep;    /* pointer to current extent */
-        xfs_bmbt_rec_t          *lastp; /* pointer to previous extent */
+        xfs_bmbt_rec_t          last = {0, 0}; /* last extent in prev block */
        xfs_bmbt_rec_t          *nextp; /* pointer to next extent */
        int                     bp_release = 0;
@@ -6262,7 +6268,6 @@ xfs_bmap_check_leaf_extents(
        /*
         * Loop over all leaf nodes checking that all extents are in the right order.
         */
-        lastp = NULL;
        for (;;) {
                xfs_fsblock_t   nextbno;
                xfs_extnum_t    num_recs;
@@ -6283,18 +6288,16 @@ xfs_bmap_check_leaf_extents(
                 */
                ep = XFS_BTREE_REC_ADDR(xfs_bmbt, block, 1);
+                if (i) {
+                        xfs_btree_check_rec(XFS_BTNUM_BMAP, &last, ep);
+                }
                for (j = 1; j < num_recs; j++) {
                        nextp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, j + 1);
-                        if (lastp) {
+                        xfs_btree_check_rec(XFS_BTNUM_BMAP, ep, nextp);
-                                xfs_btree_check_rec(XFS_BTNUM_BMAP,
-                                        (void *)lastp, (void *)ep);
-                        }
-                        xfs_btree_check_rec(XFS_BTNUM_BMAP, (void *)ep,
-                                (void *)(nextp));
-                        lastp = ep;
                        ep = nextp;
                }
+                last = *ep;
                i += num_recs;
                if (bp_release) {
                        bp_release = 0;
@@ -6325,13 +6328,13 @@ xfs_bmap_check_leaf_extents(
        return;
 error0:
-        cmn_err(CE_WARN, "%s: at error0", __FUNCTION__);
+        cmn_err(CE_WARN, "%s: at error0", __func__);
        if (bp_release)
                xfs_trans_brelse(NULL, bp);
 error_norelse:
        cmn_err(CE_WARN, "%s: BAD after btree leaves for %d extents",
-                __FUNCTION__, i);
+                __func__, i);
-        panic("%s: CORRUPTED BTREE OR SOMETHING", __FUNCTION__);
+        panic("%s: CORRUPTED BTREE OR SOMETHING", __func__);
        return;
 }
 #endif
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 87224b7d7984..6ff70cda451c 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -151,7 +151,7 @@ xfs_bmap_trace_exlist(
        xfs_extnum_t            cnt,            /* count of entries in list */
        int                     whichfork);     /* data or attr fork */
 #define XFS_BMAP_TRACE_EXLIST(ip,c,w)   \
-        xfs_bmap_trace_exlist(__FUNCTION__,ip,c,w)
+        xfs_bmap_trace_exlist(__func__,ip,c,w)
 #else
 #define XFS_BMAP_TRACE_EXLIST(ip,c,w)
 #endif
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index bd18987326a3..4f0e849d973e 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -275,21 +275,21 @@ xfs_bmbt_trace_cursor(
 }
 #define XFS_BMBT_TRACE_ARGBI(c,b,i)     \
-        xfs_bmbt_trace_argbi(__FUNCTION__, c, b, i, __LINE__)
+        xfs_bmbt_trace_argbi(__func__, c, b, i, __LINE__)
 #define XFS_BMBT_TRACE_ARGBII(c,b,i,j)  \
-        xfs_bmbt_trace_argbii(__FUNCTION__, c, b, i, j, __LINE__)
+        xfs_bmbt_trace_argbii(__func__, c, b, i, j, __LINE__)
 #define XFS_BMBT_TRACE_ARGFFFI(c,o,b,i,j)       \
-        xfs_bmbt_trace_argfffi(__FUNCTION__, c, o, b, i, j, __LINE__)
+        xfs_bmbt_trace_argfffi(__func__, c, o, b, i, j, __LINE__)
 #define XFS_BMBT_TRACE_ARGI(c,i)        \
-        xfs_bmbt_trace_argi(__FUNCTION__, c, i, __LINE__)
+        xfs_bmbt_trace_argi(__func__, c, i, __LINE__)
 #define XFS_BMBT_TRACE_ARGIFK(c,i,f,s)  \
-        xfs_bmbt_trace_argifk(__FUNCTION__, c, i, f, s, __LINE__)
+        xfs_bmbt_trace_argifk(__func__, c, i, f, s, __LINE__)
 #define XFS_BMBT_TRACE_ARGIFR(c,i,f,r)  \
-        xfs_bmbt_trace_argifr(__FUNCTION__, c, i, f, r, __LINE__)
+        xfs_bmbt_trace_argifr(__func__, c, i, f, r, __LINE__)
 #define XFS_BMBT_TRACE_ARGIK(c,i,k)     \
-        xfs_bmbt_trace_argik(__FUNCTION__, c, i, k, __LINE__)
+        xfs_bmbt_trace_argik(__func__, c, i, k, __LINE__)
 #define XFS_BMBT_TRACE_CURSOR(c,s)      \
-        xfs_bmbt_trace_cursor(__FUNCTION__, c, s, __LINE__)
+        xfs_bmbt_trace_cursor(__func__, c, s, __LINE__)
 #else
 #define XFS_BMBT_TRACE_ARGBI(c,b,i)
 #define XFS_BMBT_TRACE_ARGBII(c,b,i,j)
@@ -2027,6 +2027,24 @@ xfs_bmbt_increment(
 /*
 * Insert the current record at the point referenced by cur.
+ *
+ * A multi-level split of the tree on insert will invalidate the original
+ * cursor. It appears, however, that some callers assume that the cursor is
+ * always valid. Hence if we do a multi-level split we need to revalidate the
+ * cursor.
+ *
+ * When a split occurs, we will see a new cursor returned. Use that as a
+ * trigger to determine if we need to revalidate the original cursor. If we get
+ * a split, then use the original irec to lookup up the path of the record we
+ * just inserted.
+ *
+ * Note that the fact that the btree root is in the inode means that we can
+ * have the level of the tree change without a "split" occurring at the root
+ * level. What happens is that the root is migrated to an allocated block and
+ * the inode root is pointed to it. This means a single split can change the
+ * level of the tree (level 2 -> level 3) and invalidate the old cursor. Hence
+ * the level change should be accounted as a split so as to correctly trigger a
+ * revalidation of the old cursor.
 */
 int                                     /* error */
 xfs_bmbt_insert(
@@ -2039,11 +2057,14 @@ xfs_bmbt_insert(
        xfs_fsblock_t   nbno;
        xfs_btree_cur_t *ncur;
        xfs_bmbt_rec_t  nrec;
+        xfs_bmbt_irec_t oirec;          /* original irec */
        xfs_btree_cur_t *pcur;
+        int             splits = 0;
        XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
        level = 0;
        nbno = NULLFSBLOCK;
+        oirec = cur->bc_rec.b;
        xfs_bmbt_disk_set_all(&nrec, &cur->bc_rec.b);
        ncur = NULL;
        pcur = cur;
@@ -2052,11 +2073,13 @@ xfs_bmbt_insert(
                                &i))) {
                        if (pcur != cur)
                                xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
-                        XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                        goto error0;
-                        return error;
                }
                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
                if (pcur != cur && (ncur || nbno == NULLFSBLOCK)) {
+                        /* allocating a new root is effectively a split */
+                        if (cur->bc_nlevels != pcur->bc_nlevels)
+                                splits++;
                        cur->bc_nlevels = pcur->bc_nlevels;
                        cur->bc_private.b.allocated +=
                                pcur->bc_private.b.allocated;
@@ -2070,10 +2093,21 @@ xfs_bmbt_insert(
                        xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
                }
                if (ncur) {
+                        splits++;
                        pcur = ncur;
                        ncur = NULL;
                }
        } while (nbno != NULLFSBLOCK);
+        if (splits > 1) {
+                /* revalidate the old cursor as we had a multi-level split */
+                error = xfs_bmbt_lookup_eq(cur, oirec.br_startoff,
+                                oirec.br_startblock, oirec.br_blockcount, &i);
+                if (error)
+                        goto error0;
+                ASSERT(i == 1);
+        }
        XFS_BMBT_TRACE_CURSOR(cur, EXIT);
        *stat = i;
        return 0;
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 63debd147eb5..53a71c62025d 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -645,7 +645,12 @@ xfs_buf_item_push(
        bp = bip->bli_buf;
        if (XFS_BUF_ISDELAYWRITE(bp)) {
-                xfs_bawrite(bip->bli_item.li_mountp, bp);
+                int     error;
+                error = xfs_bawrite(bip->bli_item.li_mountp, bp);
+                if (error)
+                        xfs_fs_cmn_err(CE_WARN, bip->bli_item.li_mountp,
+                        "xfs_buf_item_push: pushbuf error %d on bip %p, bp %p",
+                                        error, bip, bp);
        } else {
                xfs_buf_relse(bp);
        }
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index e92e73f0e6af..7cb26529766b 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -44,6 +44,7 @@
 #include "xfs_error.h"
 #include "xfs_vnodeops.h"
+struct xfs_name xfs_name_dotdot = {"..", 2};
 void
 xfs_dir_mount(
@@ -146,8 +147,7 @@ int
 xfs_dir_createname(
        xfs_trans_t             *tp,
        xfs_inode_t             *dp,
-        char                    *name,
+        struct xfs_name         *name,
-        int                     namelen,
        xfs_ino_t               inum,           /* new entry inode number */
        xfs_fsblock_t           *first,         /* bmap's firstblock */
        xfs_bmap_free_t         *flist,         /* bmap's freeblock list */
@@ -162,9 +162,9 @@ xfs_dir_createname(
                return rval;
        XFS_STATS_INC(xs_dir_create);
-        args.name = name;
+        args.name = name->name;
-        args.namelen = namelen;
+        args.namelen = name->len;
-        args.hashval = xfs_da_hashname(name, namelen);
+        args.hashval = xfs_da_hashname(name->name, name->len);
        args.inumber = inum;
        args.dp = dp;
        args.firstblock = first;
@@ -197,8 +197,7 @@ int
 xfs_dir_lookup(
        xfs_trans_t     *tp,
        xfs_inode_t     *dp,
-        char            *name,
+        struct xfs_name *name,
-        int             namelen,
        xfs_ino_t       *inum)          /* out: inode number */
 {
        xfs_da_args_t   args;
@@ -207,18 +206,14 @@ xfs_dir_lookup(
        ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
        XFS_STATS_INC(xs_dir_lookup);
+        memset(&args, 0, sizeof(xfs_da_args_t));
-        args.name = name;
+        args.name = name->name;
-        args.namelen = namelen;
+        args.namelen = name->len;
-        args.hashval = xfs_da_hashname(name, namelen);
+        args.hashval = xfs_da_hashname(name->name, name->len);
-        args.inumber = 0;
        args.dp = dp;
-        args.firstblock = NULL;
-        args.flist = NULL;
-        args.total = 0;
        args.whichfork = XFS_DATA_FORK;
        args.trans = tp;
-        args.justcheck = args.addname = 0;
        args.oknoent = 1;
        if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
@@ -247,8 +242,7 @@ int
 xfs_dir_removename(
        xfs_trans_t     *tp,
        xfs_inode_t     *dp,
-        char            *name,
+        struct xfs_name *name,
-        int             namelen,
        xfs_ino_t       ino,
        xfs_fsblock_t   *first,         /* bmap's firstblock */
        xfs_bmap_free_t *flist,         /* bmap's freeblock list */
@@ -261,9 +255,9 @@ xfs_dir_removename(
        ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
        XFS_STATS_INC(xs_dir_remove);
-        args.name = name;
+        args.name = name->name;
-        args.namelen = namelen;
+        args.namelen = name->len;
-        args.hashval = xfs_da_hashname(name, namelen);
+        args.hashval = xfs_da_hashname(name->name, name->len);
        args.inumber = ino;
        args.dp = dp;
        args.firstblock = first;
@@ -329,8 +323,7 @@ int
 xfs_dir_replace(
        xfs_trans_t     *tp,
        xfs_inode_t     *dp,
-        char            *name,          /* name of entry to replace */
+        struct xfs_name *name,          /* name of entry to replace */
-        int             namelen,
        xfs_ino_t       inum,           /* new inode number */
        xfs_fsblock_t   *first,         /* bmap's firstblock */
        xfs_bmap_free_t *flist,         /* bmap's freeblock list */
@@ -345,9 +338,9 @@ xfs_dir_replace(
        if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum)))
                return rval;
-        args.name = name;
+        args.name = name->name;
-        args.namelen = namelen;
+        args.namelen = name->len;
-        args.hashval = xfs_da_hashname(name, namelen);
+        args.hashval = xfs_da_hashname(name->name, name->len);
        args.inumber = inum;
        args.dp = dp;
        args.firstblock = first;
@@ -374,28 +367,29 @@ xfs_dir_replace(
 /*
 * See if this entry can be added to the directory without allocating space.
+ * First checks that the caller couldn't reserve enough space (resblks = 0).
 */
 int
 xfs_dir_canenter(
        xfs_trans_t     *tp,
        xfs_inode_t     *dp,
-        char            *name,          /* name of entry to add */
+        struct xfs_name *name,          /* name of entry to add */
-        int             namelen)
+        uint            resblks)
 {
        xfs_da_args_t   args;
        int             rval;
        int             v;              /* type-checking value */
+        if (resblks)
+                return 0;
        ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+        memset(&args, 0, sizeof(xfs_da_args_t));
-        args.name = name;
+        args.name = name->name;
-        args.namelen = namelen;
+        args.namelen = name->len;
-        args.hashval = xfs_da_hashname(name, namelen);
+        args.hashval = xfs_da_hashname(name->name, name->len);
-        args.inumber = 0;
        args.dp = dp;
-        args.firstblock = NULL;
-        args.flist = NULL;
-        args.total = 0;
        args.whichfork = XFS_DATA_FORK;
        args.trans = tp;
        args.justcheck = args.addname = args.oknoent = 1;
diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/xfs_dir2.h
index b265197e74cf..6392f939029f 100644
--- a/fs/xfs/xfs_dir2.h
+++ b/fs/xfs/xfs_dir2.h
@@ -59,6 +59,8 @@ typedef	__uint32_t	xfs_dir2_db_t;
 */
 typedef xfs_off_t       xfs_dir2_off_t;
+extern struct xfs_name  xfs_name_dotdot;
 /*
 * Generic directory interface routines
 */
@@ -68,21 +70,21 @@ extern int xfs_dir_isempty(struct xfs_inode *dp);
 extern int xfs_dir_init(struct xfs_trans *tp, struct xfs_inode *dp,
                                struct xfs_inode *pdp);
 extern int xfs_dir_createname(struct xfs_trans *tp, struct xfs_inode *dp,
-                                char *name, int namelen, xfs_ino_t inum,
+                                struct xfs_name *name, xfs_ino_t inum,
                                xfs_fsblock_t *first,
                                struct xfs_bmap_free *flist, xfs_extlen_t tot);
 extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp,
-                                char *name, int namelen, xfs_ino_t *inum);
+                                struct xfs_name *name, xfs_ino_t *inum);
 extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp,
-                                char *name, int namelen, xfs_ino_t ino,
+                                struct xfs_name *name, xfs_ino_t ino,
                                xfs_fsblock_t *first,
                                struct xfs_bmap_free *flist, xfs_extlen_t tot);
 extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp,
-                                char *name, int namelen, xfs_ino_t inum,
+                                struct xfs_name *name, xfs_ino_t inum,
                                xfs_fsblock_t *first,
                                struct xfs_bmap_free *flist, xfs_extlen_t tot);
 extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp,
-                                char *name, int namelen);
+                                struct xfs_name *name, uint resblks);
 extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino);
 /*
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index eb03eab5ca52..3f3785b10804 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -73,7 +73,7 @@ xfs_filestreams_trace(
 #define TRACE4(mp,t,a0,a1,a2,a3)        TRACE6(mp,t,a0,a1,a2,a3,0,0)
 #define TRACE5(mp,t,a0,a1,a2,a3,a4)     TRACE6(mp,t,a0,a1,a2,a3,a4,0)
 #define TRACE6(mp,t,a0,a1,a2,a3,a4,a5) \
-        xfs_filestreams_trace(mp, t, __FUNCTION__, __LINE__, \
+        xfs_filestreams_trace(mp, t, __func__, __LINE__, \
                                (__psunsigned_t)a0, (__psunsigned_t)a1, \
                                (__psunsigned_t)a2, (__psunsigned_t)a3, \
                                (__psunsigned_t)a4, (__psunsigned_t)a5)
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 5a146cb22980..a64dfbd565a5 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -107,6 +107,16 @@ xfs_ialloc_log_di(
 /*
 * Allocation group level functions.
 */
+static inline int
+xfs_ialloc_cluster_alignment(
+        xfs_alloc_arg_t *args)
+{
+        if (xfs_sb_version_hasalign(&args->mp->m_sb) &&
+            args->mp->m_sb.sb_inoalignmt >=
+             XFS_B_TO_FSBT(args->mp, XFS_INODE_CLUSTER_SIZE(args->mp)))
+                return args->mp->m_sb.sb_inoalignmt;
+        return 1;
+}
 /*
 * Allocate new inodes in the allocation group specified by agbp.
@@ -167,10 +177,24 @@ xfs_ialloc_ag_alloc(
                args.mod = args.total = args.wasdel = args.isfl =
                        args.userdata = args.minalignslop = 0;
                args.prod = 1;
-                args.alignment = 1;
                /*
-                 * Allow space for the inode btree to split.
+                 * We need to take into account alignment here to ensure that
+                 * we don't modify the free list if we fail to have an exact
+                 * block. If we don't have an exact match, and every oher
+                 * attempt allocation attempt fails, we'll end up cancelling
+                 * a dirty transaction and shutting down.
+                 *
+                 * For an exact allocation, alignment must be 1,
+                 * however we need to take cluster alignment into account when
+                 * fixing up the freelist. Use the minalignslop field to
+                 * indicate that extra blocks might be required for alignment,
+                 * but not to use them in the actual exact allocation.
                 */
+                args.alignment = 1;
+                args.minalignslop = xfs_ialloc_cluster_alignment(&args) - 1;
+                /* Allow space for the inode btree to split. */
                args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1;
                if ((error = xfs_alloc_vextent(&args)))
                        return error;
@@ -191,13 +215,8 @@ xfs_ialloc_ag_alloc(
                        ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN));
                        args.alignment = args.mp->m_dalign;
                        isaligned = 1;
-                } else if (xfs_sb_version_hasalign(&args.mp->m_sb) &&
+                } else
-                           args.mp->m_sb.sb_inoalignmt >=
+                        args.alignment = xfs_ialloc_cluster_alignment(&args);
-                           XFS_B_TO_FSBT(args.mp,
-                                XFS_INODE_CLUSTER_SIZE(args.mp)))
-                                args.alignment = args.mp->m_sb.sb_inoalignmt;
-                else
-                        args.alignment = 1;
                /*
                 * Need to figure out where to allocate the inode blocks.
                 * Ideally they should be spaced out through the a.g.
@@ -230,12 +249,7 @@ xfs_ialloc_ag_alloc(
                args.agbno = be32_to_cpu(agi->agi_root);
                args.fsbno = XFS_AGB_TO_FSB(args.mp,
                                be32_to_cpu(agi->agi_seqno), args.agbno);
-                if (xfs_sb_version_hasalign(&args.mp->m_sb) &&
+                args.alignment = xfs_ialloc_cluster_alignment(&args);
-                        args.mp->m_sb.sb_inoalignmt >=
-                        XFS_B_TO_FSBT(args.mp, XFS_INODE_CLUSTER_SIZE(args.mp)))
-                                args.alignment = args.mp->m_sb.sb_inoalignmt;
-                else
-                        args.alignment = 1;
                if ((error = xfs_alloc_vextent(&args)))
                        return error;
        }
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 8e09b71f4104..e657c5128460 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -78,7 +78,6 @@ xfs_iget_core(
        xfs_inode_t     *ip;
        xfs_inode_t     *iq;
        int             error;
-        xfs_icluster_t  *icl, *new_icl = NULL;
        unsigned long   first_index, mask;
        xfs_perag_t     *pag;
        xfs_agino_t     agino;
@@ -229,11 +228,9 @@ finish_inode:
        }
        /*
-         * This is a bit messy - we preallocate everything we _might_
+         * Preload the radix tree so we can insert safely under the
-         * need before we pick up the ici lock. That way we don't have to
+         * write spinlock.
-         * juggle locks and go all the way back to the start.
         */
-        new_icl = kmem_zone_alloc(xfs_icluster_zone, KM_SLEEP);
        if (radix_tree_preload(GFP_KERNEL)) {
                xfs_idestroy(ip);
                delay(1);
@@ -242,17 +239,6 @@ finish_inode:
        mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
        first_index = agino & mask;
        write_lock(&pag->pag_ici_lock);
-        /*
-         * Find the cluster if it exists
-         */
-        icl = NULL;
-        if (radix_tree_gang_lookup(&pag->pag_ici_root, (void**)&iq,
-                                                        first_index, 1)) {
-                if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) == first_index)
-                        icl = iq->i_cluster;
-        }
        /*
         * insert the new inode
         */
@@ -267,30 +253,13 @@ finish_inode:
        }
        /*
-         * These values _must_ be set before releasing ihlock!
+         * These values _must_ be set before releasing the radix tree lock!
         */
        ip->i_udquot = ip->i_gdquot = NULL;
        xfs_iflags_set(ip, XFS_INEW);
-        ASSERT(ip->i_cluster == NULL);
-        if (!icl) {
-                spin_lock_init(&new_icl->icl_lock);
-                INIT_HLIST_HEAD(&new_icl->icl_inodes);
-                icl = new_icl;
-                new_icl = NULL;
-        } else {
-                ASSERT(!hlist_empty(&icl->icl_inodes));
-        }
-        spin_lock(&icl->icl_lock);
-        hlist_add_head(&ip->i_cnode, &icl->icl_inodes);
-        ip->i_cluster = icl;
-        spin_unlock(&icl->icl_lock);
        write_unlock(&pag->pag_ici_lock);
        radix_tree_preload_end();
-        if (new_icl)
-                kmem_zone_free(xfs_icluster_zone, new_icl);
        /*
         * Link ip to its mount and thread it on the mount's inode list.
@@ -529,18 +498,6 @@ xfs_iextract(
        xfs_put_perag(mp, pag);
        /*
-         * Remove from cluster list
-         */
-        mp = ip->i_mount;
-        spin_lock(&ip->i_cluster->icl_lock);
-        hlist_del(&ip->i_cnode);
-        spin_unlock(&ip->i_cluster->icl_lock);
-        /* was last inode in cluster? */
-        if (hlist_empty(&ip->i_cluster->icl_inodes))
-                kmem_zone_free(xfs_icluster_zone, ip->i_cluster);
-        /*
         * Remove from mount's inode list.
         */
        XFS_MOUNT_ILOCK(mp);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index f43a6e01d68f..ca12acb90394 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -55,7 +55,6 @@
 kmem_zone_t *xfs_ifork_zone;
 kmem_zone_t *xfs_inode_zone;
-kmem_zone_t *xfs_icluster_zone;
 /*
 * Used in xfs_itruncate().  This is the maximum number of extents
@@ -126,6 +125,90 @@ xfs_inobp_check(
 #endif
 /*
+ * Find the buffer associated with the given inode map
+ * We do basic validation checks on the buffer once it has been
+ * retrieved from disk.
+ */
+STATIC int
+xfs_imap_to_bp(
+        xfs_mount_t     *mp,
+        xfs_trans_t     *tp,
+        xfs_imap_t      *imap,
+        xfs_buf_t       **bpp,
+        uint            buf_flags,
+        uint            imap_flags)
+{
+        int             error;
+        int             i;
+        int             ni;
+        xfs_buf_t       *bp;
+        error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
+                                   (int)imap->im_len, buf_flags, &bp);
+        if (error) {
+                if (error != EAGAIN) {
+                        cmn_err(CE_WARN,
+                                "xfs_imap_to_bp: xfs_trans_read_buf()returned "
+                                "an error %d on %s.  Returning error.",
+                                error, mp->m_fsname);
+                } else {
+                        ASSERT(buf_flags & XFS_BUF_TRYLOCK);
+                }
+                return error;
+        }
+        /*
+         * Validate the magic number and version of every inode in the buffer
+         * (if DEBUG kernel) or the first inode in the buffer, otherwise.
+         */
+#ifdef DEBUG
+        ni = BBTOB(imap->im_len) >> mp->m_sb.sb_inodelog;
+#else   /* usual case */
+        ni = 1;
+#endif
+        for (i = 0; i < ni; i++) {
+                int             di_ok;
+                xfs_dinode_t    *dip;
+                dip = (xfs_dinode_t *)xfs_buf_offset(bp,
+                                        (i << mp->m_sb.sb_inodelog));
+                di_ok = be16_to_cpu(dip->di_core.di_magic) == XFS_DINODE_MAGIC &&
+                            XFS_DINODE_GOOD_VERSION(dip->di_core.di_version);
+                if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
+                                                XFS_ERRTAG_ITOBP_INOTOBP,
+                                                XFS_RANDOM_ITOBP_INOTOBP))) {
+                        if (imap_flags & XFS_IMAP_BULKSTAT) {
+                                xfs_trans_brelse(tp, bp);
+                                return XFS_ERROR(EINVAL);
+                        }
+                        XFS_CORRUPTION_ERROR("xfs_imap_to_bp",
+                                                XFS_ERRLEVEL_HIGH, mp, dip);
+#ifdef DEBUG
+                        cmn_err(CE_PANIC,
+                                        "Device %s - bad inode magic/vsn "
+                                        "daddr %lld #%d (magic=%x)",
+                                XFS_BUFTARG_NAME(mp->m_ddev_targp),
+                                (unsigned long long)imap->im_blkno, i,
+                                be16_to_cpu(dip->di_core.di_magic));
+#endif
+                        xfs_trans_brelse(tp, bp);
+                        return XFS_ERROR(EFSCORRUPTED);
+                }
+        }
+        xfs_inobp_check(mp, bp);
+        /*
+         * Mark the buffer as an inode buffer now that it looks good
+         */
+        XFS_BUF_SET_VTYPE(bp, B_FS_INO);
+        *bpp = bp;
+        return 0;
+}
+/*
 * This routine is called to map an inode number within a file
 * system to the buffer containing the on-disk version of the
 * inode.  It returns a pointer to the buffer containing the
@@ -147,72 +230,19 @@ xfs_inotobp(
        xfs_buf_t       **bpp,
        int             *offset)
 {
-        int             di_ok;
        xfs_imap_t      imap;
        xfs_buf_t       *bp;
        int             error;
-        xfs_dinode_t    *dip;
-        /*
-         * Call the space management code to find the location of the
-         * inode on disk.
-         */
        imap.im_blkno = 0;
        error = xfs_imap(mp, tp, ino, &imap, XFS_IMAP_LOOKUP);
-        if (error != 0) {
+        if (error)
-                cmn_err(CE_WARN,
-        "xfs_inotobp: xfs_imap()  returned an "
-        "error %d on %s.  Returning error.", error, mp->m_fsname);
                return error;
-        }
-        /*
+        error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, 0);
-         * If the inode number maps to a block outside the bounds of the
+        if (error)
-         * file system then return NULL rather than calling read_buf
-         * and panicing when we get an error from the driver.
-         */
-        if ((imap.im_blkno + imap.im_len) >
-            XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) {
-                cmn_err(CE_WARN,
-        "xfs_inotobp: inode number (%llu + %d) maps to a block outside the bounds "
-        "of the file system %s.  Returning EINVAL.",
-                        (unsigned long long)imap.im_blkno,
-                        imap.im_len, mp->m_fsname);
-                return XFS_ERROR(EINVAL);
-        }
-        /*
-         * Read in the buffer.  If tp is NULL, xfs_trans_read_buf() will
-         * default to just a read_buf() call.
-         */
-        error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno,
-                                   (int)imap.im_len, XFS_BUF_LOCK, &bp);
-        if (error) {
-                cmn_err(CE_WARN,
-        "xfs_inotobp: xfs_trans_read_buf()  returned an "
-        "error %d on %s.  Returning error.", error, mp->m_fsname);
                return error;
-        }
-        dip = (xfs_dinode_t *)xfs_buf_offset(bp, 0);
-        di_ok =
-                be16_to_cpu(dip->di_core.di_magic) == XFS_DINODE_MAGIC &&
-                XFS_DINODE_GOOD_VERSION(dip->di_core.di_version);
-        if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP,
-                        XFS_RANDOM_ITOBP_INOTOBP))) {
-                XFS_CORRUPTION_ERROR("xfs_inotobp", XFS_ERRLEVEL_LOW, mp, dip);
-                xfs_trans_brelse(tp, bp);
-                cmn_err(CE_WARN,
-        "xfs_inotobp: XFS_TEST_ERROR()  returned an "
-        "error on %s.  Returning EFSCORRUPTED.",  mp->m_fsname);
-                return XFS_ERROR(EFSCORRUPTED);
-        }
-        xfs_inobp_check(mp, bp);
-        /*
-         * Set *dipp to point to the on-disk inode in the buffer.
-         */
        *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset);
        *bpp = bp;
        *offset = imap.im_boffset;
@@ -248,46 +278,21 @@ xfs_itobp(
        xfs_dinode_t    **dipp,
        xfs_buf_t       **bpp,
        xfs_daddr_t     bno,
-        uint            imap_flags)
+        uint            imap_flags,
+        uint            buf_flags)
 {
        xfs_imap_t      imap;
        xfs_buf_t       *bp;
        int             error;
-        int             i;
-        int             ni;
        if (ip->i_blkno == (xfs_daddr_t)0) {
-                /*
-                 * Call the space management code to find the location of the
-                 * inode on disk.
-                 */
                imap.im_blkno = bno;
-                if ((error = xfs_imap(mp, tp, ip->i_ino, &imap,
+                error = xfs_imap(mp, tp, ip->i_ino, &imap,
-                                        XFS_IMAP_LOOKUP | imap_flags)))
+                                        XFS_IMAP_LOOKUP | imap_flags);
+                if (error)
                        return error;
                /*
-                 * If the inode number maps to a block outside the bounds
-                 * of the file system then return NULL rather than calling
-                 * read_buf and panicing when we get an error from the
-                 * driver.
-                 */
-                if ((imap.im_blkno + imap.im_len) >
-                    XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) {
-#ifdef DEBUG
-                        xfs_fs_cmn_err(CE_ALERT, mp, "xfs_itobp: "
-                                        "(imap.im_blkno (0x%llx) "
-                                        "+ imap.im_len (0x%llx)) > "
-                                        " XFS_FSB_TO_BB(mp, "
-                                        "mp->m_sb.sb_dblocks) (0x%llx)",
-                                        (unsigned long long) imap.im_blkno,
-                                        (unsigned long long) imap.im_len,
-                                        XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks));
-#endif /* DEBUG */
-                        return XFS_ERROR(EINVAL);
-                }
-                /*
                 * Fill in the fields in the inode that will be used to
                 * map the inode to its buffer from now on.
                 */
@@ -305,76 +310,17 @@ xfs_itobp(
        }
        ASSERT(bno == 0 || bno == imap.im_blkno);
-        /*
+        error = xfs_imap_to_bp(mp, tp, &imap, &bp, buf_flags, imap_flags);
-         * Read in the buffer.  If tp is NULL, xfs_trans_read_buf() will
+        if (error)
-         * default to just a read_buf() call.
-         */
-        error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno,
-                                   (int)imap.im_len, XFS_BUF_LOCK, &bp);
-        if (error) {
-#ifdef DEBUG
-                xfs_fs_cmn_err(CE_ALERT, mp, "xfs_itobp: "
-                                "xfs_trans_read_buf() returned error %d, "
-                                "imap.im_blkno 0x%llx, imap.im_len 0x%llx",
-                                error, (unsigned long long) imap.im_blkno,
-                                (unsigned long long) imap.im_len);
-#endif /* DEBUG */
                return error;
-        }
-        /*
-         * Validate the magic number and version of every inode in the buffer
-         * (if DEBUG kernel) or the first inode in the buffer, otherwise.
-         * No validation is done here in userspace (xfs_repair).
-         */
-#if !defined(__KERNEL__)
-        ni = 0;
-#elif defined(DEBUG)
-        ni = BBTOB(imap.im_len) >> mp->m_sb.sb_inodelog;
-#else   /* usual case */
-        ni = 1;
-#endif
-        for (i = 0; i < ni; i++) {
-                int             di_ok;
-                xfs_dinode_t    *dip;
-                dip = (xfs_dinode_t *)xfs_buf_offset(bp,
+        if (!bp) {
-                                        (i << mp->m_sb.sb_inodelog));
+                ASSERT(buf_flags & XFS_BUF_TRYLOCK);
-                di_ok = be16_to_cpu(dip->di_core.di_magic) == XFS_DINODE_MAGIC &&
+                ASSERT(tp == NULL);
-                            XFS_DINODE_GOOD_VERSION(dip->di_core.di_version);
+                *bpp = NULL;
-                if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
+                return EAGAIN;
-                                                XFS_ERRTAG_ITOBP_INOTOBP,
-                                                XFS_RANDOM_ITOBP_INOTOBP))) {
-                        if (imap_flags & XFS_IMAP_BULKSTAT) {
-                                xfs_trans_brelse(tp, bp);
-                                return XFS_ERROR(EINVAL);
-                        }
-#ifdef DEBUG
-                        cmn_err(CE_ALERT,
-                                        "Device %s - bad inode magic/vsn "
-                                        "daddr %lld #%d (magic=%x)",
-                                XFS_BUFTARG_NAME(mp->m_ddev_targp),
-                                (unsigned long long)imap.im_blkno, i,
-                                be16_to_cpu(dip->di_core.di_magic));
-#endif
-                        XFS_CORRUPTION_ERROR("xfs_itobp", XFS_ERRLEVEL_HIGH,
-                                             mp, dip);
-                        xfs_trans_brelse(tp, bp);
-                        return XFS_ERROR(EFSCORRUPTED);
-                }
        }
-        xfs_inobp_check(mp, bp);
-        /*
-         * Mark the buffer as an inode buffer now that it looks good
-         */
-        XFS_BUF_SET_VTYPE(bp, B_FS_INO);
-        /*
-         * Set *dipp to point to the on-disk inode in the buffer.
-         */
        *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset);
        *bpp = bp;
        return 0;
@@ -878,7 +824,7 @@ xfs_iread(
         * return NULL as well.  Set i_blkno to 0 so that xfs_itobp() will
         * know that this is a new incore inode.
         */
-        error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags);
+        error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags, XFS_BUF_LOCK);
        if (error) {
                kmem_zone_free(xfs_inode_zone, ip);
                return error;
@@ -1518,51 +1464,50 @@ xfs_itruncate_start(
 }
 /*
- * Shrink the file to the given new_size.  The new
+ * Shrink the file to the given new_size.  The new size must be smaller than
- * size must be smaller than the current size.
+ * the current size.  This will free up the underlying blocks in the removed
- * This will free up the underlying blocks
+ * range after a call to xfs_itruncate_start() or xfs_atruncate_start().
- * in the removed range after a call to xfs_itruncate_start()
- * or xfs_atruncate_start().
 *
- * The transaction passed to this routine must have made
+ * The transaction passed to this routine must have made a permanent log
- * a permanent log reservation of at least XFS_ITRUNCATE_LOG_RES.
+ * reservation of at least XFS_ITRUNCATE_LOG_RES.  This routine may commit the
- * This routine may commit the given transaction and
+ * given transaction and start new ones, so make sure everything involved in
- * start new ones, so make sure everything involved in
+ * the transaction is tidy before calling here.  Some transaction will be
- * the transaction is tidy before calling here.
+ * returned to the caller to be committed.  The incoming transaction must
- * Some transaction will be returned to the caller to be
+ * already include the inode, and both inode locks must be held exclusively.
- * committed.  The incoming transaction must already include
+ * The inode must also be "held" within the transaction.  On return the inode
- * the inode, and both inode locks must be held exclusively.
+ * will be "held" within the returned transaction.  This routine does NOT
- * The inode must also be "held" within the transaction.  On
+ * require any disk space to be reserved for it within the transaction.
- * return the inode will be "held" within the returned transaction.
- * This routine does NOT require any disk space to be reserved
- * for it within the transaction.
 *
- * The fork parameter must be either xfs_attr_fork or xfs_data_fork,
+ * The fork parameter must be either xfs_attr_fork or xfs_data_fork, and it
- * and it indicates the fork which is to be truncated.  For the
+ * indicates the fork which is to be truncated.  For the attribute fork we only
- * attribute fork we only support truncation to size 0.
+ * support truncation to size 0.
 *
- * We use the sync parameter to indicate whether or not the first
+ * We use the sync parameter to indicate whether or not the first transaction
- * transaction we perform might have to be synchronous.  For the attr fork,
+ * we perform might have to be synchronous.  For the attr fork, it needs to be
- * it needs to be so if the unlink of the inode is not yet known to be
+ * so if the unlink of the inode is not yet known to be permanent in the log.
- * permanent in the log.  This keeps us from freeing and reusing the
+ * This keeps us from freeing and reusing the blocks of the attribute fork
- * blocks of the attribute fork before the unlink of the inode becomes
+ * before the unlink of the inode becomes permanent.
- * permanent.
 *
- * For the data fork, we normally have to run synchronously if we're
+ * For the data fork, we normally have to run synchronously if we're being
- * being called out of the inactive path or we're being called
+ * called out of the inactive path or we're being called out of the create path
- * out of the create path where we're truncating an existing file.
+ * where we're truncating an existing file.  Either way, the truncate needs to
- * Either way, the truncate needs to be sync so blocks don't reappear
+ * be sync so blocks don't reappear in the file with altered data in case of a
- * in the file with altered data in case of a crash.  wsync filesystems
+ * crash.  wsync filesystems can run the first case async because anything that
- * can run the first case async because anything that shrinks the inode
+ * shrinks the inode has to run sync so by the time we're called here from
- * has to run sync so by the time we're called here from inactive, the
+ * inactive, the inode size is permanently set to 0.
- * inode size is permanently set to 0.
 *
- * Calls from the truncate path always need to be sync unless we're
+ * Calls from the truncate path always need to be sync unless we're in a wsync
- * in a wsync filesystem and the file has already been unlinked.
+ * filesystem and the file has already been unlinked.
 *
- * The caller is responsible for correctly setting the sync parameter.
+ * The caller is responsible for correctly setting the sync parameter.  It gets
- * It gets too hard for us to guess here which path we're being called
+ * too hard for us to guess here which path we're being called out of just
- * out of just based on inode state.
+ * based on inode state.
+ *
+ * If we get an error, we must return with the inode locked and linked into the
+ * current transaction. This keeps things simple for the higher level code,
+ * because it always knows that the inode is locked and held in the transaction
+ * that returns to it whether errors occur or not.  We don't mark the inode
+ * dirty on error so that transactions can be easily aborted if possible.
 */
 int
 xfs_itruncate_finish(
@@ -1741,65 +1686,51 @@ xfs_itruncate_finish(
                 */
                error = xfs_bmap_finish(tp, &free_list, &committed);
                ntp = *tp;
+                if (committed) {
+                        /* link the inode into the next xact in the chain */
+                        xfs_trans_ijoin(ntp, ip,
+                                        XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+                        xfs_trans_ihold(ntp, ip);
+                }
                if (error) {
                        /*
-                         * If the bmap finish call encounters an error,
+                         * If the bmap finish call encounters an error, return
-                         * return to the caller where the transaction
+                         * to the caller where the transaction can be properly
-                         * can be properly aborted.  We just need to
+                         * aborted.  We just need to make sure we're not
-                         * make sure we're not holding any resources
+                         * holding any resources that we were not when we came
-                         * that we were not when we came in.
+                         * in.
                         *
-                         * Aborting from this point might lose some
+                         * Aborting from this point might lose some blocks in
-                         * blocks in the file system, but oh well.
+                         * the file system, but oh well.
                         */
                        xfs_bmap_cancel(&free_list);
-                        if (committed) {
-                                /*
-                                 * If the passed in transaction committed
-                                 * in xfs_bmap_finish(), then we want to
-                                 * add the inode to this one before returning.
-                                 * This keeps things simple for the higher
-                                 * level code, because it always knows that
-                                 * the inode is locked and held in the
-                                 * transaction that returns to it whether
-                                 * errors occur or not.  We don't mark the
-                                 * inode dirty so that this transaction can
-                                 * be easily aborted if possible.
-                                 */
-                                xfs_trans_ijoin(ntp, ip,
-                                        XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-                                xfs_trans_ihold(ntp, ip);
-                        }
                        return error;
                }
                if (committed) {
                        /*
-                         * The first xact was committed,
+                         * Mark the inode dirty so it will be logged and
-                         * so add the inode to the new one.
+                         * moved forward in the log as part of every commit.
-                         * Mark it dirty so it will be logged
-                         * and moved forward in the log as
-                         * part of every commit.
                         */
-                        xfs_trans_ijoin(ntp, ip,
-                                        XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-                        xfs_trans_ihold(ntp, ip);
                        xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE);
                }
                ntp = xfs_trans_dup(ntp);
-                (void) xfs_trans_commit(*tp, 0);
+                error = xfs_trans_commit(*tp, 0);
                *tp = ntp;
-                error = xfs_trans_reserve(ntp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
-                                          XFS_TRANS_PERM_LOG_RES,
+                /* link the inode into the next transaction in the chain */
-                                          XFS_ITRUNCATE_LOG_COUNT);
-                /*
-                 * Add the inode being truncated to the next chained
-                 * transaction.
-                 */
                xfs_trans_ijoin(ntp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
                xfs_trans_ihold(ntp, ip);
+                if (!error)
+                        error = xfs_trans_reserve(ntp, 0,
+                                        XFS_ITRUNCATE_LOG_RES(mp), 0,
+                                        XFS_TRANS_PERM_LOG_RES,
+                                        XFS_ITRUNCATE_LOG_COUNT);
                if (error)
-                        return (error);
+                        return error;
        }
        /*
         * Only update the size in the case of the data fork, but
@@ -1967,7 +1898,7 @@ xfs_iunlink(
                 * Here we put the head pointer into our next pointer,
                 * and then we fall through to point the head at us.
                 */
-                error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0);
+                error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK);
                if (error)
                        return error;
@@ -2075,7 +2006,7 @@ xfs_iunlink_remove(
                 * of dealing with the buffer when there is no need to
                 * change it.
                 */
-                error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0);
+                error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK);
                if (error) {
                        cmn_err(CE_WARN,
                                "xfs_iunlink_remove: xfs_itobp()  returned an error %d on %s.  Returning error.",
@@ -2137,7 +2068,7 @@ xfs_iunlink_remove(
                 * Now last_ibp points to the buffer previous to us on
                 * the unlinked list.  Pull us from the list.
                 */
-                error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0);
+                error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK);
                if (error) {
                        cmn_err(CE_WARN,
                                "xfs_iunlink_remove: xfs_itobp()  returned an error %d on %s.  Returning error.",
@@ -2172,13 +2103,6 @@ xfs_iunlink_remove(
        return 0;
 }
-STATIC_INLINE int xfs_inode_clean(xfs_inode_t *ip)
-{
-        return (((ip->i_itemp == NULL) ||
-                !(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL)) &&
-                (ip->i_update_core == 0));
-}
 STATIC void
 xfs_ifree_cluster(
        xfs_inode_t     *free_ip,
@@ -2400,7 +2324,7 @@ xfs_ifree(
        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-        error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, 0, 0);
+        error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK);
        if (error)
                return error;
@@ -2678,14 +2602,31 @@ xfs_imap(
        fsbno = imap->im_blkno ?
                XFS_DADDR_TO_FSB(mp, imap->im_blkno) : NULLFSBLOCK;
        error = xfs_dilocate(mp, tp, ino, &fsbno, &len, &off, flags);
-        if (error != 0) {
+        if (error)
                return error;
-        }
        imap->im_blkno = XFS_FSB_TO_DADDR(mp, fsbno);
        imap->im_len = XFS_FSB_TO_BB(mp, len);
        imap->im_agblkno = XFS_FSB_TO_AGBNO(mp, fsbno);
        imap->im_ioffset = (ushort)off;
        imap->im_boffset = (ushort)(off << mp->m_sb.sb_inodelog);
+        /*
+         * If the inode number maps to a block outside the bounds
+         * of the file system then return NULL rather than calling
+         * read_buf and panicing when we get an error from the
+         * driver.
+         */
+        if ((imap->im_blkno + imap->im_len) >
+            XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) {
+                xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
+                        "(imap->im_blkno (0x%llx) + imap->im_len (0x%llx)) > "
+                        " XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) (0x%llx)",
+                        (unsigned long long) imap->im_blkno,
+                        (unsigned long long) imap->im_len,
+                        XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks));
+                return EINVAL;
+        }
        return 0;
 }
@@ -2826,38 +2767,41 @@ xfs_iunpin(
 }
 /*
- * This is called to wait for the given inode to be unpinned.
+ * This is called to unpin an inode. It can be directed to wait or to return
- * It will sleep until this happens.  The caller must have the
+ * immediately without waiting for the inode to be unpinned.  The caller must
- * inode locked in at least shared mode so that the buffer cannot
+ * have the inode locked in at least shared mode so that the buffer cannot be
- * be subsequently pinned once someone is waiting for it to be
+ * subsequently pinned once someone is waiting for it to be unpinned.
- * unpinned.
 */
 STATIC void
-xfs_iunpin_wait(
+__xfs_iunpin_wait(
-        xfs_inode_t     *ip)
+        xfs_inode_t     *ip,
+        int             wait)
 {
-        xfs_inode_log_item_t    *iip;
+        xfs_inode_log_item_t    *iip = ip->i_itemp;
-        xfs_lsn_t       lsn;
        ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE | MR_ACCESS));
+        if (atomic_read(&ip->i_pincount) == 0)
-        if (atomic_read(&ip->i_pincount) == 0) {
                return;
-        }
-        iip = ip->i_itemp;
+        /* Give the log a push to start the unpinning I/O */
-        if (iip && iip->ili_last_lsn) {
+        xfs_log_force(ip->i_mount, (iip && iip->ili_last_lsn) ?
-                lsn = iip->ili_last_lsn;
+                                iip->ili_last_lsn : 0, XFS_LOG_FORCE);
-        } else {
+        if (wait)
-                lsn = (xfs_lsn_t)0;
+                wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0));
-        }
+}
-        /*
+static inline void
-         * Give the log a push so we don't wait here too long.
+xfs_iunpin_wait(
-         */
+        xfs_inode_t     *ip)
-        xfs_log_force(ip->i_mount, lsn, XFS_LOG_FORCE);
+{
+        __xfs_iunpin_wait(ip, 1);
+}
-        wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0));
+static inline void
+xfs_iunpin_nowait(
+        xfs_inode_t     *ip)
+{
+        __xfs_iunpin_wait(ip, 0);
 }
@@ -2932,7 +2876,7 @@ xfs_iextents_copy(
 * format indicates the current state of the fork.
 */
 /*ARGSUSED*/
-STATIC int
+STATIC void
 xfs_iflush_fork(
        xfs_inode_t             *ip,
        xfs_dinode_t            *dip,
@@ -2953,16 +2897,16 @@ xfs_iflush_fork(
        static const short      extflag[2] =
                { XFS_ILOG_DEXT, XFS_ILOG_AEXT };
-        if (iip == NULL)
+        if (!iip)
-                return 0;
+                return;
        ifp = XFS_IFORK_PTR(ip, whichfork);
        /*
         * This can happen if we gave up in iformat in an error path,
         * for the attribute fork.
         */
-        if (ifp == NULL) {
+        if (!ifp) {
                ASSERT(whichfork == XFS_ATTR_FORK);
-                return 0;
+                return;
        }
        cp = XFS_DFORK_PTR(dip, whichfork);
        mp = ip->i_mount;
@@ -3023,8 +2967,145 @@ xfs_iflush_fork(
                ASSERT(0);
                break;
        }
+}
+STATIC int
+xfs_iflush_cluster(
+        xfs_inode_t     *ip,
+        xfs_buf_t       *bp)
+{
+        xfs_mount_t             *mp = ip->i_mount;
+        xfs_perag_t             *pag = xfs_get_perag(mp, ip->i_ino);
+        unsigned long           first_index, mask;
+        int                     ilist_size;
+        xfs_inode_t             **ilist;
+        xfs_inode_t             *iq;
+        int                     nr_found;
+        int                     clcount = 0;
+        int                     bufwasdelwri;
+        int                     i;
+        ASSERT(pag->pagi_inodeok);
+        ASSERT(pag->pag_ici_init);
+        ilist_size = XFS_INODE_CLUSTER_SIZE(mp) * sizeof(xfs_inode_t *);
+        ilist = kmem_alloc(ilist_size, KM_MAYFAIL);
+        if (!ilist)
+                return 0;
+        mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
+        first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask;
+        read_lock(&pag->pag_ici_lock);
+        /* really need a gang lookup range call here */
+        nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist,
+                                        first_index,
+                                        XFS_INODE_CLUSTER_SIZE(mp));
+        if (nr_found == 0)
+                goto out_free;
+        for (i = 0; i < nr_found; i++) {
+                iq = ilist[i];
+                if (iq == ip)
+                        continue;
+                /* if the inode lies outside this cluster, we're done. */
+                if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index)
+                        break;
+                /*
+                 * Do an un-protected check to see if the inode is dirty and
+                 * is a candidate for flushing.  These checks will be repeated
+                 * later after the appropriate locks are acquired.
+                 */
+                if (xfs_inode_clean(iq) && xfs_ipincount(iq) == 0)
+                        continue;
+                /*
+                 * Try to get locks.  If any are unavailable or it is pinned,
+                 * then this inode cannot be flushed and is skipped.
+                 */
+                if (!xfs_ilock_nowait(iq, XFS_ILOCK_SHARED))
+                        continue;
+                if (!xfs_iflock_nowait(iq)) {
+                        xfs_iunlock(iq, XFS_ILOCK_SHARED);
+                        continue;
+                }
+                if (xfs_ipincount(iq)) {
+                        xfs_ifunlock(iq);
+                        xfs_iunlock(iq, XFS_ILOCK_SHARED);
+                        continue;
+                }
+                /*
+                 * arriving here means that this inode can be flushed.  First
+                 * re-check that it's dirty before flushing.
+                 */
+                if (!xfs_inode_clean(iq)) {
+                        int     error;
+                        error = xfs_iflush_int(iq, bp);
+                        if (error) {
+                                xfs_iunlock(iq, XFS_ILOCK_SHARED);
+                                goto cluster_corrupt_out;
+                        }
+                        clcount++;
+                } else {
+                        xfs_ifunlock(iq);
+                }
+                xfs_iunlock(iq, XFS_ILOCK_SHARED);
+        }
+        if (clcount) {
+                XFS_STATS_INC(xs_icluster_flushcnt);
+                XFS_STATS_ADD(xs_icluster_flushinode, clcount);
+        }
+out_free:
+        read_unlock(&pag->pag_ici_lock);
+        kmem_free(ilist, ilist_size);
        return 0;
+cluster_corrupt_out:
+        /*
+         * Corruption detected in the clustering loop.  Invalidate the
+         * inode buffer and shut down the filesystem.
+         */
+        read_unlock(&pag->pag_ici_lock);
+        /*
+         * Clean up the buffer.  If it was B_DELWRI, just release it --
+         * brelse can handle it with no problems.  If not, shut down the
+         * filesystem before releasing the buffer.
+         */
+        bufwasdelwri = XFS_BUF_ISDELAYWRITE(bp);
+        if (bufwasdelwri)
+                xfs_buf_relse(bp);
+        xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+        if (!bufwasdelwri) {
+                /*
+                 * Just like incore_relse: if we have b_iodone functions,
+                 * mark the buffer as an error and call them.  Otherwise
+                 * mark it as stale and brelse.
+                 */
+                if (XFS_BUF_IODONE_FUNC(bp)) {
+                        XFS_BUF_CLR_BDSTRAT_FUNC(bp);
+                        XFS_BUF_UNDONE(bp);
+                        XFS_BUF_STALE(bp);
+                        XFS_BUF_SHUT(bp);
+                        XFS_BUF_ERROR(bp,EIO);
+                        xfs_biodone(bp);
+                } else {
+                        XFS_BUF_STALE(bp);
+                        xfs_buf_relse(bp);
+                }
+        }
+        /*
+         * Unlocks the flush lock
+         */
+        xfs_iflush_abort(iq);
+        kmem_free(ilist, ilist_size);
+        return XFS_ERROR(EFSCORRUPTED);
 }
 /*
@@ -3046,11 +3127,7 @@ xfs_iflush(
        xfs_dinode_t            *dip;
        xfs_mount_t             *mp;
        int                     error;
-        /* REFERENCED */
+        int                     noblock = (flags == XFS_IFLUSH_ASYNC_NOBLOCK);
-        xfs_inode_t             *iq;
-        int                     clcount;        /* count of inodes clustered */
-        int                     bufwasdelwri;
-        struct hlist_node       *entry;
        enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) };
        XFS_STATS_INC(xs_iflush_count);
@@ -3067,8 +3144,7 @@ xfs_iflush(
         * If the inode isn't dirty, then just release the inode
         * flush lock and do nothing.
         */
-        if ((ip->i_update_core == 0) &&
+        if (xfs_inode_clean(ip)) {
-            ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL))) {
                ASSERT((iip != NULL) ?
                         !(iip->ili_item.li_flags & XFS_LI_IN_AIL) : 1);
                xfs_ifunlock(ip);
@@ -3076,11 +3152,21 @@ xfs_iflush(
        }
        /*
-         * We can't flush the inode until it is unpinned, so
+         * We can't flush the inode until it is unpinned, so wait for it if we
-         * wait for it.  We know noone new can pin it, because
+         * are allowed to block.  We know noone new can pin it, because we are
-         * we are holding the inode lock shared and you need
+         * holding the inode lock shared and you need to hold it exclusively to
-         * to hold it exclusively to pin the inode.
+         * pin the inode.
+         *
+         * If we are not allowed to block, force the log out asynchronously so
+         * that when we come back the inode will be unpinned. If other inodes
+         * in the same cluster are dirty, they will probably write the inode
+         * out for us if they occur after the log force completes.
         */
+        if (noblock && xfs_ipincount(ip)) {
+                xfs_iunpin_nowait(ip);
+                xfs_ifunlock(ip);
+                return EAGAIN;
+        }
        xfs_iunpin_wait(ip);
        /*
@@ -3097,15 +3183,6 @@ xfs_iflush(
        }
        /*
-         * Get the buffer containing the on-disk inode.
-         */
-        error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0, 0);
-        if (error) {
-                xfs_ifunlock(ip);
-                return error;
-        }
-        /*
         * Decide how buffer will be flushed out.  This is done before
         * the call to xfs_iflush_int because this field is zeroed by it.
         */
@@ -3121,6 +3198,7 @@ xfs_iflush(
                case XFS_IFLUSH_DELWRI_ELSE_SYNC:
                        flags = 0;
                        break;
+                case XFS_IFLUSH_ASYNC_NOBLOCK:
                case XFS_IFLUSH_ASYNC:
                case XFS_IFLUSH_DELWRI_ELSE_ASYNC:
                        flags = INT_ASYNC;
@@ -3140,6 +3218,7 @@ xfs_iflush(
                case XFS_IFLUSH_DELWRI:
                        flags = INT_DELWRI;
                        break;
+                case XFS_IFLUSH_ASYNC_NOBLOCK:
                case XFS_IFLUSH_ASYNC:
                        flags = INT_ASYNC;
                        break;
@@ -3154,94 +3233,41 @@ xfs_iflush(
        }
        /*
-         * First flush out the inode that xfs_iflush was called with.
+         * Get the buffer containing the on-disk inode.
         */
-        error = xfs_iflush_int(ip, bp);
+        error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0, 0,
-        if (error) {
+                                noblock ? XFS_BUF_TRYLOCK : XFS_BUF_LOCK);
-                goto corrupt_out;
+        if (error || !bp) {
+                xfs_ifunlock(ip);
+                return error;
        }
        /*
-         * inode clustering:
+         * First flush out the inode that xfs_iflush was called with.
-         * see if other inodes can be gathered into this write
         */
-        spin_lock(&ip->i_cluster->icl_lock);
+        error = xfs_iflush_int(ip, bp);
-        ip->i_cluster->icl_buf = bp;
+        if (error)
+                goto corrupt_out;
-        clcount = 0;
-        hlist_for_each_entry(iq, entry, &ip->i_cluster->icl_inodes, i_cnode) {
-                if (iq == ip)
-                        continue;
-                /*
-                 * Do an un-protected check to see if the inode is dirty and
-                 * is a candidate for flushing.  These checks will be repeated
-                 * later after the appropriate locks are acquired.
-                 */
-                iip = iq->i_itemp;
-                if ((iq->i_update_core == 0) &&
-                    ((iip == NULL) ||
-                     !(iip->ili_format.ilf_fields & XFS_ILOG_ALL)) &&
-                      xfs_ipincount(iq) == 0) {
-                        continue;
-                }
-                /*
-                 * Try to get locks.  If any are unavailable,
-                 * then this inode cannot be flushed and is skipped.
-                 */
-                /* get inode locks (just i_lock) */
-                if (xfs_ilock_nowait(iq, XFS_ILOCK_SHARED)) {
-                        /* get inode flush lock */
-                        if (xfs_iflock_nowait(iq)) {
-                                /* check if pinned */
-                                if (xfs_ipincount(iq) == 0) {
-                                        /* arriving here means that
-                                         * this inode can be flushed.
-                                         * first re-check that it's
-                                         * dirty
-                                         */
-                                        iip = iq->i_itemp;
-                                        if ((iq->i_update_core != 0)||
-                                            ((iip != NULL) &&
-                                             (iip->ili_format.ilf_fields & XFS_ILOG_ALL))) {
-                                                clcount++;
-                                                error = xfs_iflush_int(iq, bp);
-                                                if (error) {
-                                                        xfs_iunlock(iq,
-                                                                    XFS_ILOCK_SHARED);
-                                                        goto cluster_corrupt_out;
-                                                }
-                                        } else {
-                                                xfs_ifunlock(iq);
-                                        }
-                                } else {
-                                        xfs_ifunlock(iq);
-                                }
-                        }
-                        xfs_iunlock(iq, XFS_ILOCK_SHARED);
-                }
-        }
-        spin_unlock(&ip->i_cluster->icl_lock);
-        if (clcount) {
-                XFS_STATS_INC(xs_icluster_flushcnt);
-                XFS_STATS_ADD(xs_icluster_flushinode, clcount);
-        }
        /*
-         * If the buffer is pinned then push on the log so we won't
+         * If the buffer is pinned then push on the log now so we won't
         * get stuck waiting in the write for too long.
         */
-        if (XFS_BUF_ISPINNED(bp)){
+        if (XFS_BUF_ISPINNED(bp))
                xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
-        }
+        /*
+         * inode clustering:
+         * see if other inodes can be gathered into this write
+         */
+        error = xfs_iflush_cluster(ip, bp);
+        if (error)
+                goto cluster_corrupt_out;
        if (flags & INT_DELWRI) {
                xfs_bdwrite(mp, bp);
        } else if (flags & INT_ASYNC) {
-                xfs_bawrite(mp, bp);
+                error = xfs_bawrite(mp, bp);
        } else {
                error = xfs_bwrite(mp, bp);
        }
@@ -3250,52 +3276,11 @@ xfs_iflush(
 corrupt_out:
        xfs_buf_relse(bp);
        xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
-        xfs_iflush_abort(ip);
-        /*
-         * Unlocks the flush lock
-         */
-        return XFS_ERROR(EFSCORRUPTED);
 cluster_corrupt_out:
-        /* Corruption detected in the clustering loop.  Invalidate the
-         * inode buffer and shut down the filesystem.
-         */
-        spin_unlock(&ip->i_cluster->icl_lock);
-        /*
-         * Clean up the buffer.  If it was B_DELWRI, just release it --
-         * brelse can handle it with no problems.  If not, shut down the
-         * filesystem before releasing the buffer.
-         */
-        if ((bufwasdelwri= XFS_BUF_ISDELAYWRITE(bp))) {
-                xfs_buf_relse(bp);
-        }
-        xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
-        if(!bufwasdelwri)  {
-                /*
-                 * Just like incore_relse: if we have b_iodone functions,
-                 * mark the buffer as an error and call them.  Otherwise
-                 * mark it as stale and brelse.
-                 */
-                if (XFS_BUF_IODONE_FUNC(bp)) {
-                        XFS_BUF_CLR_BDSTRAT_FUNC(bp);
-                        XFS_BUF_UNDONE(bp);
-                        XFS_BUF_STALE(bp);
-                        XFS_BUF_SHUT(bp);
-                        XFS_BUF_ERROR(bp,EIO);
-                        xfs_biodone(bp);
-                } else {
-                        XFS_BUF_STALE(bp);
-                        xfs_buf_relse(bp);
-                }
-        }
-        xfs_iflush_abort(iq);
        /*
         * Unlocks the flush lock
         */
+        xfs_iflush_abort(ip);
        return XFS_ERROR(EFSCORRUPTED);
 }
@@ -3325,8 +3310,7 @@ xfs_iflush_int(
         * If the inode isn't dirty, then just release the inode
         * flush lock and do nothing.
         */
-        if ((ip->i_update_core == 0) &&
+        if (xfs_inode_clean(ip)) {
-            ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL))) {
                xfs_ifunlock(ip);
                return 0;
        }
@@ -3459,16 +3443,9 @@ xfs_iflush_int(
                }
        }
-        if (xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp) == EFSCORRUPTED) {
+        xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp);
-                goto corrupt_out;
+        if (XFS_IFORK_Q(ip))
-        }
+                xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp);
-        if (XFS_IFORK_Q(ip)) {
-                /*
-                 * The only error from xfs_iflush_fork is on the data fork.
-                 */
-                (void) xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp);
-        }
        xfs_inobp_check(mp, bp);
        /*
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index bfcd72cbaeea..93c37697a72c 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -133,19 +133,6 @@ typedef struct dm_attrs_s {
 } dm_attrs_t;
 /*
- * This is the xfs inode cluster structure.  This structure is used by
- * xfs_iflush to find inodes that share a cluster and can be flushed to disk at
- * the same time.
- */
-typedef struct xfs_icluster {
-        struct hlist_head       icl_inodes;     /* list of inodes on cluster */
-        xfs_daddr_t             icl_blkno;      /* starting block number of
-                                                 * the cluster */
-        struct xfs_buf          *icl_buf;       /* the inode buffer */
-        spinlock_t              icl_lock;       /* inode list lock */
-} xfs_icluster_t;
-/*
 * This is the xfs in-core inode structure.
 * Most of the on-disk inode is embedded in the i_d field.
 *
@@ -240,10 +227,6 @@ typedef struct xfs_inode {
        atomic_t                i_pincount;     /* inode pin count */
        wait_queue_head_t       i_ipin_wait;    /* inode pinning wait queue */
        spinlock_t              i_flags_lock;   /* inode i_flags lock */
-#ifdef HAVE_REFCACHE
-        struct xfs_inode        **i_refcache;   /* ptr to entry in ref cache */
-        struct xfs_inode        *i_release;     /* inode to unref */
-#endif
        /* Miscellaneous state. */
        unsigned short          i_flags;        /* see defined flags below */
        unsigned char           i_update_core;  /* timestamps/size is dirty */
@@ -252,8 +235,6 @@ typedef struct xfs_inode {
        unsigned int            i_delayed_blks; /* count of delay alloc blks */
        xfs_icdinode_t          i_d;            /* most of ondisk inode */
-        xfs_icluster_t          *i_cluster;     /* cluster list header */
-        struct hlist_node       i_cnode;        /* cluster link node */
        xfs_fsize_t             i_size;         /* in-memory size */
        xfs_fsize_t             i_new_size;     /* size when write completes */
@@ -461,6 +442,7 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
 #define XFS_IFLUSH_SYNC                 3
 #define XFS_IFLUSH_ASYNC                4
 #define XFS_IFLUSH_DELWRI               5
+#define XFS_IFLUSH_ASYNC_NOBLOCK        6
 /*
 * Flags for xfs_itruncate_start().
@@ -515,7 +497,7 @@ int		xfs_finish_reclaim_all(struct xfs_mount *, int);
 */
 int             xfs_itobp(struct xfs_mount *, struct xfs_trans *,
                          xfs_inode_t *, struct xfs_dinode **, struct xfs_buf **,
-                          xfs_daddr_t, uint);
+                          xfs_daddr_t, uint, uint);
 int             xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
                          xfs_inode_t **, xfs_daddr_t, uint);
 int             xfs_iread_extents(struct xfs_trans *, xfs_inode_t *, int);
@@ -597,7 +579,6 @@ void		xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
 #define xfs_inobp_check(mp, bp)
 #endif /* DEBUG */
-extern struct kmem_zone *xfs_icluster_zone;
 extern struct kmem_zone *xfs_ifork_zone;
 extern struct kmem_zone *xfs_inode_zone;
 extern struct kmem_zone *xfs_ili_zone;
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 2c775b4ae9e6..93b5db453ea2 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -40,6 +40,7 @@
 #include "xfs_btree.h"
 #include "xfs_ialloc.h"
 #include "xfs_rw.h"
+#include "xfs_error.h"
 kmem_zone_t     *xfs_ili_zone;          /* inode log item zone */
@@ -813,7 +814,12 @@ xfs_inode_item_pushbuf(
                                              XFS_LOG_FORCE);
                        }
                        if (dopush) {
-                                xfs_bawrite(mp, bp);
+                                int     error;
+                                error = xfs_bawrite(mp, bp);
+                                if (error)
+                                        xfs_fs_cmn_err(CE_WARN, mp,
+                "xfs_inode_item_pushbuf: pushbuf error %d on iip %p, bp %p",
+                                                        error, iip, bp);
                        } else {
                                xfs_buf_relse(bp);
                        }
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index bfe92ea17952..40513077ab36 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -168,6 +168,14 @@ static inline int xfs_ilog_fext(int w)
        return (w == XFS_DATA_FORK ? XFS_ILOG_DEXT : XFS_ILOG_AEXT);
 }
+static inline int xfs_inode_clean(xfs_inode_t *ip)
+{
+        return (!ip->i_itemp ||
+                !(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL)) &&
+               !ip->i_update_core;
+}
 #ifdef __KERNEL__
 extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index fde37f87d52f..fb3cf1191419 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -802,8 +802,11 @@ xfs_iomap_write_allocate(
                         */
                        nimaps = 1;
                        end_fsb = XFS_B_TO_FSB(mp, ip->i_size);
-                        xfs_bmap_last_offset(NULL, ip, &last_block,
+                        error = xfs_bmap_last_offset(NULL, ip, &last_block,
-                                XFS_DATA_FORK);
+                                                        XFS_DATA_FORK);
+                        if (error)
+                                goto trans_cancel;
                        last_block = XFS_FILEOFF_MAX(last_block, end_fsb);
                        if ((map_start_fsb + count_fsb) > last_block) {
                                count_fsb = last_block - map_start_fsb;
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index f615e04364f4..eb85bdedad0c 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -129,7 +129,7 @@ xfs_bulkstat_one_iget(
        return error;
 }
-STATIC int
+STATIC void
 xfs_bulkstat_one_dinode(
        xfs_mount_t     *mp,            /* mount point for filesystem */
        xfs_ino_t       ino,            /* inode number to get data for */
@@ -198,8 +198,6 @@ xfs_bulkstat_one_dinode(
                buf->bs_blocks = be64_to_cpu(dic->di_nblocks);
                break;
        }
-        return 0;
 }
 STATIC int
@@ -614,7 +612,8 @@ xfs_bulkstat(
                                                        xfs_buf_relse(bp);
                                                error = xfs_itobp(mp, NULL, ip,
                                                                &dip, &bp, bno,
-                                                                XFS_IMAP_BULKSTAT);
+                                                                XFS_IMAP_BULKSTAT,
+                                                                XFS_BUF_LOCK);
                                                if (!error)
                                                        clustidx = ip->i_boffset / mp->m_sb.sb_inodesize;
                                                kmem_zone_free(xfs_inode_zone, ip);
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 31f2b04f2c97..afaee301b0ee 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -41,6 +41,7 @@
 #include "xfs_inode.h"
 #include "xfs_rw.h"
+kmem_zone_t     *xfs_log_ticket_zone;
 #define xlog_write_adv_cnt(ptr, len, off, bytes) \
        { (ptr) += (bytes); \
@@ -73,8 +74,6 @@ STATIC int  xlog_state_get_iclog_space(xlog_t		*log,
                                       xlog_ticket_t    *ticket,
                                       int              *continued_write,
                                       int              *logoffsetp);
-STATIC void xlog_state_put_ticket(xlog_t        *log,
-                                  xlog_ticket_t *tic);
 STATIC int  xlog_state_release_iclog(xlog_t             *log,
                                     xlog_in_core_t     *iclog);
 STATIC void xlog_state_switch_iclogs(xlog_t             *log,
@@ -101,7 +100,6 @@ STATIC void xlog_ungrant_log_space(xlog_t	 *log,
 /* local ticket functions */
-STATIC void             xlog_state_ticket_alloc(xlog_t *log);
 STATIC xlog_ticket_t    *xlog_ticket_get(xlog_t *log,
                                         int    unit_bytes,
                                         int    count,
@@ -330,7 +328,7 @@ xfs_log_done(xfs_mount_t	*mp,
                 */
                xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)");
                xlog_ungrant_log_space(log, ticket);
-                xlog_state_put_ticket(log, ticket);
+                xlog_ticket_put(log, ticket);
        } else {
                xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)");
                xlog_regrant_reserve_log_space(log, ticket);
@@ -384,7 +382,27 @@ _xfs_log_force(
                return xlog_state_sync_all(log, flags, log_flushed);
        else
                return xlog_state_sync(log, lsn, flags, log_flushed);
-}       /* xfs_log_force */
+}       /* _xfs_log_force */
+/*
+ * Wrapper for _xfs_log_force(), to be used when caller doesn't care
+ * about errors or whether the log was flushed or not. This is the normal
+ * interface to use when trying to unpin items or move the log forward.
+ */
+void
+xfs_log_force(
+        xfs_mount_t     *mp,
+        xfs_lsn_t       lsn,
+        uint            flags)
+{
+        int     error;
+        error = _xfs_log_force(mp, lsn, flags, NULL);
+        if (error) {
+                xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: "
+                        "error %d returned.", error);
+        }
+}
 /*
 * Attaches a new iclog I/O completion callback routine during
@@ -397,12 +415,10 @@ xfs_log_notify(xfs_mount_t	  *mp,		/* mount of partition */
               void               *iclog_hndl,  /* iclog to hang callback off */
               xfs_log_callback_t *cb)
 {
-        xlog_t *log = mp->m_log;
        xlog_in_core_t    *iclog = (xlog_in_core_t *)iclog_hndl;
        int     abortflg;
-        cb->cb_next = NULL;
+        spin_lock(&iclog->ic_callback_lock);
-        spin_lock(&log->l_icloglock);
        abortflg = (iclog->ic_state & XLOG_STATE_IOERROR);
        if (!abortflg) {
                ASSERT_ALWAYS((iclog->ic_state == XLOG_STATE_ACTIVE) ||
@@ -411,7 +427,7 @@ xfs_log_notify(xfs_mount_t	  *mp,		/* mount of partition */
                *(iclog->ic_callback_tail) = cb;
                iclog->ic_callback_tail = &(cb->cb_next);
        }
-        spin_unlock(&log->l_icloglock);
+        spin_unlock(&iclog->ic_callback_lock);
        return abortflg;
 }       /* xfs_log_notify */
@@ -471,6 +487,8 @@ xfs_log_reserve(xfs_mount_t	 *mp,
                /* may sleep if need to allocate more tickets */
                internal_ticket = xlog_ticket_get(log, unit_bytes, cnt,
                                                  client, flags);
+                if (!internal_ticket)
+                        return XFS_ERROR(ENOMEM);
                internal_ticket->t_trans_type = t_type;
                *ticket = internal_ticket;
                xlog_trace_loggrant(log, internal_ticket, 
@@ -636,7 +654,8 @@ xfs_log_unmount_write(xfs_mount_t *mp)
        if (mp->m_flags & XFS_MOUNT_RDONLY)
                return 0;
-        xfs_log_force(mp, 0, XFS_LOG_FORCE|XFS_LOG_SYNC);
+        error = _xfs_log_force(mp, 0, XFS_LOG_FORCE|XFS_LOG_SYNC, NULL);
+        ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log)));
 #ifdef DEBUG
        first_iclog = iclog = log->l_iclog;
@@ -675,10 +694,10 @@ xfs_log_unmount_write(xfs_mount_t *mp)
                spin_lock(&log->l_icloglock);
                iclog = log->l_iclog;
-                iclog->ic_refcnt++;
+                atomic_inc(&iclog->ic_refcnt);
                spin_unlock(&log->l_icloglock);
                xlog_state_want_sync(log, iclog);
-                (void) xlog_state_release_iclog(log, iclog);
+                error = xlog_state_release_iclog(log, iclog);
                spin_lock(&log->l_icloglock);
                if (!(iclog->ic_state == XLOG_STATE_ACTIVE ||
@@ -695,7 +714,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
                if (tic) {
                        xlog_trace_loggrant(log, tic, "unmount rec");
                        xlog_ungrant_log_space(log, tic);
-                        xlog_state_put_ticket(log, tic);
+                        xlog_ticket_put(log, tic);
                }
        } else {
                /*
@@ -713,11 +732,11 @@ xfs_log_unmount_write(xfs_mount_t *mp)
                 */
                spin_lock(&log->l_icloglock);
                iclog = log->l_iclog;
-                iclog->ic_refcnt++;
+                atomic_inc(&iclog->ic_refcnt);
                spin_unlock(&log->l_icloglock);
                xlog_state_want_sync(log, iclog);
-                (void) xlog_state_release_iclog(log, iclog);
+                error =  xlog_state_release_iclog(log, iclog);
                spin_lock(&log->l_icloglock);
@@ -732,7 +751,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
                }
        }
-        return 0;
+        return error;
 }       /* xfs_log_unmount_write */
 /*
@@ -1210,7 +1229,6 @@ xlog_alloc_log(xfs_mount_t	*mp,
        spin_lock_init(&log->l_icloglock);
        spin_lock_init(&log->l_grant_lock);
        initnsema(&log->l_flushsema, 0, "ic-flush");
-        xlog_state_ticket_alloc(log);  /* wait until after icloglock inited */
        /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */
        ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0);
@@ -1240,9 +1258,9 @@ xlog_alloc_log(xfs_mount_t	*mp,
                XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
                iclog->ic_bp = bp;
                iclog->hic_data = bp->b_addr;
+#ifdef DEBUG
                log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header);
+#endif
                head = &iclog->ic_header;
                memset(head, 0, sizeof(xlog_rec_header_t));
                head->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM);
@@ -1253,10 +1271,11 @@ xlog_alloc_log(xfs_mount_t	*mp,
                head->h_fmt = cpu_to_be32(XLOG_FMT);
                memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t));
                iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize;
                iclog->ic_state = XLOG_STATE_ACTIVE;
                iclog->ic_log = log;
+                atomic_set(&iclog->ic_refcnt, 0);
+                spin_lock_init(&iclog->ic_callback_lock);
                iclog->ic_callback_tail = &(iclog->ic_callback);
                iclog->ic_datap = (char *)iclog->hic_data + log->l_iclog_hsize;
@@ -1405,7 +1424,7 @@ xlog_sync(xlog_t		*log,
        int             v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb);
        XFS_STATS_INC(xs_log_writes);
-        ASSERT(iclog->ic_refcnt == 0);
+        ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
        /* Add for LR header */
        count_init = log->l_iclog_hsize + iclog->ic_offset;
@@ -1538,7 +1557,6 @@ STATIC void
 xlog_dealloc_log(xlog_t *log)
 {
        xlog_in_core_t  *iclog, *next_iclog;
-        xlog_ticket_t   *tic, *next_tic;
        int             i;
        iclog = log->l_iclog;
@@ -1559,22 +1577,6 @@ xlog_dealloc_log(xlog_t *log)
        spinlock_destroy(&log->l_icloglock);
        spinlock_destroy(&log->l_grant_lock);
-        /* XXXsup take a look at this again. */
-        if ((log->l_ticket_cnt != log->l_ticket_tcnt)  &&
-            !XLOG_FORCED_SHUTDOWN(log)) {
-                xfs_fs_cmn_err(CE_WARN, log->l_mp,
-                        "xlog_dealloc_log: (cnt: %d, total: %d)",
-                        log->l_ticket_cnt, log->l_ticket_tcnt);
-                /* ASSERT(log->l_ticket_cnt == log->l_ticket_tcnt); */
-        } else {
-                tic = log->l_unmount_free;
-                while (tic) {
-                        next_tic = tic->t_next;
-                        kmem_free(tic, PAGE_SIZE);
-                        tic = next_tic;
-                }
-        }
        xfs_buf_free(log->l_xbuf);
 #ifdef XFS_LOG_TRACE
        if (log->l_trace != NULL) {
@@ -1987,7 +1989,7 @@ xlog_state_clean_log(xlog_t *log)
                if (iclog->ic_state == XLOG_STATE_DIRTY) {
                        iclog->ic_state = XLOG_STATE_ACTIVE;
                        iclog->ic_offset       = 0;
-                        iclog->ic_callback      = NULL;   /* don't need to free */
+                        ASSERT(iclog->ic_callback == NULL);
                        /*
                         * If the number of ops in this iclog indicate it just
                         * contains the dummy transaction, we can
@@ -2190,37 +2192,40 @@ xlog_state_do_callback(
                                        be64_to_cpu(iclog->ic_header.h_lsn);
                                spin_unlock(&log->l_grant_lock);
-                                /*
-                                 * Keep processing entries in the callback list
-                                 * until we come around and it is empty.  We
-                                 * need to atomically see that the list is
-                                 * empty and change the state to DIRTY so that
-                                 * we don't miss any more callbacks being added.
-                                 */
-                                spin_lock(&log->l_icloglock);
                        } else {
+                                spin_unlock(&log->l_icloglock);
                                ioerrors++;
                        }
-                        cb = iclog->ic_callback;
+                        /*
+                         * Keep processing entries in the callback list until
+                         * we come around and it is empty.  We need to
+                         * atomically see that the list is empty and change the
+                         * state to DIRTY so that we don't miss any more
+                         * callbacks being added.
+                         */
+                        spin_lock(&iclog->ic_callback_lock);
+                        cb = iclog->ic_callback;
                        while (cb) {
                                iclog->ic_callback_tail = &(iclog->ic_callback);
                                iclog->ic_callback = NULL;
-                                spin_unlock(&log->l_icloglock);
+                                spin_unlock(&iclog->ic_callback_lock);
                                /* perform callbacks in the order given */
                                for (; cb; cb = cb_next) {
                                        cb_next = cb->cb_next;
                                        cb->cb_func(cb->cb_arg, aborted);
                                }
-                                spin_lock(&log->l_icloglock);
+                                spin_lock(&iclog->ic_callback_lock);
                                cb = iclog->ic_callback;
                        }
                        loopdidcallbacks++;
                        funcdidcallbacks++;
+                        spin_lock(&log->l_icloglock);
                        ASSERT(iclog->ic_callback == NULL);
+                        spin_unlock(&iclog->ic_callback_lock);
                        if (!(iclog->ic_state & XLOG_STATE_IOERROR))
                                iclog->ic_state = XLOG_STATE_DIRTY;
@@ -2241,7 +2246,7 @@ xlog_state_do_callback(
                        repeats = 0;
                        xfs_fs_cmn_err(CE_WARN, log->l_mp,
                                "%s: possible infinite loop (%d iterations)",
-                                __FUNCTION__, flushcnt);
+                                __func__, flushcnt);
                }
        } while (!ioerrors && loopdidcallbacks);
@@ -2309,7 +2314,7 @@ xlog_state_done_syncing(
        ASSERT(iclog->ic_state == XLOG_STATE_SYNCING ||
               iclog->ic_state == XLOG_STATE_IOERROR);
-        ASSERT(iclog->ic_refcnt == 0);
+        ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
        ASSERT(iclog->ic_bwritecnt == 1 || iclog->ic_bwritecnt == 2);
@@ -2391,7 +2396,7 @@ restart:
        ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
        head = &iclog->ic_header;
-        iclog->ic_refcnt++;                     /* prevents sync */
+        atomic_inc(&iclog->ic_refcnt);  /* prevents sync */
        log_offset = iclog->ic_offset;
        /* On the 1st write to an iclog, figure out lsn.  This works
@@ -2423,12 +2428,12 @@ restart:
                xlog_state_switch_iclogs(log, iclog, iclog->ic_size);
                /* If I'm the only one writing to this iclog, sync it to disk */
-                if (iclog->ic_refcnt == 1) {
+                if (atomic_read(&iclog->ic_refcnt) == 1) {
                        spin_unlock(&log->l_icloglock);
                        if ((error = xlog_state_release_iclog(log, iclog)))
                                return error;
                } else {
-                        iclog->ic_refcnt--;
+                        atomic_dec(&iclog->ic_refcnt);
                        spin_unlock(&log->l_icloglock);
                }
                goto restart;
@@ -2792,18 +2797,6 @@ xlog_ungrant_log_space(xlog_t	     *log,
 /*
- * Atomically put back used ticket.
- */
-STATIC void
-xlog_state_put_ticket(xlog_t        *log,
-                      xlog_ticket_t *tic)
-{
-        spin_lock(&log->l_icloglock);
-        xlog_ticket_put(log, tic);
-        spin_unlock(&log->l_icloglock);
-}       /* xlog_state_put_ticket */
-/*
 * Flush iclog to disk if this is the last reference to the given iclog and
 * the WANT_SYNC bit is set.
 *
@@ -2813,33 +2806,35 @@ xlog_state_put_ticket(xlog_t	    *log,
 *
 */
 STATIC int
-xlog_state_release_iclog(xlog_t         *log,
+xlog_state_release_iclog(
-                         xlog_in_core_t *iclog)
+        xlog_t          *log,
+        xlog_in_core_t  *iclog)
 {
        int             sync = 0;       /* do we sync? */
-        xlog_assign_tail_lsn(log->l_mp);
+        if (iclog->ic_state & XLOG_STATE_IOERROR)
+                return XFS_ERROR(EIO);
-        spin_lock(&log->l_icloglock);
+        ASSERT(atomic_read(&iclog->ic_refcnt) > 0);
+        if (!atomic_dec_and_lock(&iclog->ic_refcnt, &log->l_icloglock))
+                return 0;
        if (iclog->ic_state & XLOG_STATE_IOERROR) {
                spin_unlock(&log->l_icloglock);
                return XFS_ERROR(EIO);
        }
-        ASSERT(iclog->ic_refcnt > 0);
        ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE ||
               iclog->ic_state == XLOG_STATE_WANT_SYNC);
-        if (--iclog->ic_refcnt == 0 &&
+        if (iclog->ic_state == XLOG_STATE_WANT_SYNC) {
-            iclog->ic_state == XLOG_STATE_WANT_SYNC) {
+                /* update tail before writing to iclog */
+                xlog_assign_tail_lsn(log->l_mp);
                sync++;
                iclog->ic_state = XLOG_STATE_SYNCING;
                iclog->ic_header.h_tail_lsn = cpu_to_be64(log->l_tail_lsn);
                xlog_verify_tail_lsn(log, iclog, log->l_tail_lsn);
                /* cycle incremented when incrementing curr_block */
        }
        spin_unlock(&log->l_icloglock);
        /*
@@ -2849,11 +2844,9 @@ xlog_state_release_iclog(xlog_t		*log,
         * this iclog has consistent data, so we ignore IOERROR
         * flags after this point.
         */
-        if (sync) {
+        if (sync)
                return xlog_sync(log, iclog);
-        }
        return 0;
 }       /* xlog_state_release_iclog */
@@ -2953,7 +2946,8 @@ xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed)
                 * previous iclog and go to sleep.
                 */
                if (iclog->ic_state == XLOG_STATE_DIRTY ||
-                    (iclog->ic_refcnt == 0 && iclog->ic_offset == 0)) {
+                    (atomic_read(&iclog->ic_refcnt) == 0
+                     && iclog->ic_offset == 0)) {
                        iclog = iclog->ic_prev;
                        if (iclog->ic_state == XLOG_STATE_ACTIVE ||
                            iclog->ic_state == XLOG_STATE_DIRTY)
@@ -2961,14 +2955,14 @@ xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed)
                        else
                                goto maybe_sleep;
                } else {
-                        if (iclog->ic_refcnt == 0) {
+                        if (atomic_read(&iclog->ic_refcnt) == 0) {
                                /* We are the only one with access to this
                                 * iclog.  Flush it out now.  There should
                                 * be a roundoff of zero to show that someone
                                 * has already taken care of the roundoff from
                                 * the previous sync.
                                 */
-                                iclog->ic_refcnt++;
+                                atomic_inc(&iclog->ic_refcnt);
                                lsn = be64_to_cpu(iclog->ic_header.h_lsn);
                                xlog_state_switch_iclogs(log, iclog, 0);
                                spin_unlock(&log->l_icloglock);
@@ -3100,7 +3094,7 @@ try_again:
                        already_slept = 1;
                        goto try_again;
                } else {
-                        iclog->ic_refcnt++;
+                        atomic_inc(&iclog->ic_refcnt);
                        xlog_state_switch_iclogs(log, iclog, 0);
                        spin_unlock(&log->l_icloglock);
                        if (xlog_state_release_iclog(log, iclog))
@@ -3172,92 +3166,19 @@ xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog)
 */
 /*
- *      Algorithm doesn't take into account page size. ;-(
+ * Free a used ticket.
- */
-STATIC void
-xlog_state_ticket_alloc(xlog_t *log)
-{
-        xlog_ticket_t   *t_list;
-        xlog_ticket_t   *next;
-        xfs_caddr_t     buf;
-        uint            i = (PAGE_SIZE / sizeof(xlog_ticket_t)) - 2;
-        /*
-         * The kmem_zalloc may sleep, so we shouldn't be holding the
-         * global lock.  XXXmiken: may want to use zone allocator.
-         */
-        buf = (xfs_caddr_t) kmem_zalloc(PAGE_SIZE, KM_SLEEP);
-        spin_lock(&log->l_icloglock);
-        /* Attach 1st ticket to Q, so we can keep track of allocated memory */
-        t_list = (xlog_ticket_t *)buf;
-        t_list->t_next = log->l_unmount_free;
-        log->l_unmount_free = t_list++;
-        log->l_ticket_cnt++;
-        log->l_ticket_tcnt++;
-        /* Next ticket becomes first ticket attached to ticket free list */
-        if (log->l_freelist != NULL) {
-                ASSERT(log->l_tail != NULL);
-                log->l_tail->t_next = t_list;
-        } else {
-                log->l_freelist = t_list;
-        }
-        log->l_ticket_cnt++;
-        log->l_ticket_tcnt++;
-        /* Cycle through rest of alloc'ed memory, building up free Q */
-        for ( ; i > 0; i--) {
-                next = t_list + 1;
-                t_list->t_next = next;
-                t_list = next;
-                log->l_ticket_cnt++;
-                log->l_ticket_tcnt++;
-        }
-        t_list->t_next = NULL;
-        log->l_tail = t_list;
-        spin_unlock(&log->l_icloglock);
-}       /* xlog_state_ticket_alloc */
-/*
- * Put ticket into free list
- *
- * Assumption: log lock is held around this call.
 */
 STATIC void
 xlog_ticket_put(xlog_t          *log,
                xlog_ticket_t   *ticket)
 {
        sv_destroy(&ticket->t_sema);
+        kmem_zone_free(xfs_log_ticket_zone, ticket);
-        /*
-         * Don't think caching will make that much difference.  It's
-         * more important to make debug easier.
-         */
-#if 0
-        /* real code will want to use LIFO for caching */
-        ticket->t_next = log->l_freelist;
-        log->l_freelist = ticket;
-        /* no need to clear fields */
-#else
-        /* When we debug, it is easier if tickets are cycled */
-        ticket->t_next     = NULL;
-        if (log->l_tail) {
-                log->l_tail->t_next = ticket;
-        } else {
-                ASSERT(log->l_freelist == NULL);
-                log->l_freelist = ticket;
-        }
-        log->l_tail         = ticket;
-#endif /* DEBUG */
-        log->l_ticket_cnt++;
 }       /* xlog_ticket_put */
 /*
- * Grab ticket off freelist or allocation some more
+ * Allocate and initialise a new log ticket.
 */
 STATIC xlog_ticket_t *
 xlog_ticket_get(xlog_t          *log,
@@ -3269,21 +3190,9 @@ xlog_ticket_get(xlog_t		*log,
        xlog_ticket_t   *tic;
        uint            num_headers;
- alloc:
+        tic = kmem_zone_zalloc(xfs_log_ticket_zone, KM_SLEEP|KM_MAYFAIL);
-        if (log->l_freelist == NULL)
+        if (!tic)
-                xlog_state_ticket_alloc(log);           /* potentially sleep */
+                return NULL;
-        spin_lock(&log->l_icloglock);
-        if (log->l_freelist == NULL) {
-                spin_unlock(&log->l_icloglock);
-                goto alloc;
-        }
-        tic             = log->l_freelist;
-        log->l_freelist = tic->t_next;
-        if (log->l_freelist == NULL)
-                log->l_tail = NULL;
-        log->l_ticket_cnt--;
-        spin_unlock(&log->l_icloglock);
        /*
         * Permanent reservations have up to 'cnt'-1 active log operations
@@ -3611,8 +3520,8 @@ xfs_log_force_umount(
         * before we mark the filesystem SHUTDOWN and wake
         * everybody up to tell the bad news.
         */
-        spin_lock(&log->l_grant_lock);
        spin_lock(&log->l_icloglock);
+        spin_lock(&log->l_grant_lock);
        mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN;
        XFS_BUF_DONE(mp->m_sb_bp);
        /*
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 4cdac048df5e..d1d678ecb63e 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -142,8 +142,9 @@ int	  _xfs_log_force(struct xfs_mount *mp,
                         xfs_lsn_t      lsn,
                         uint           flags,
                         int            *log_forced);
-#define xfs_log_force(mp, lsn, flags) \
+void      xfs_log_force(struct xfs_mount        *mp,
-        _xfs_log_force(mp, lsn, flags, NULL);
+                        xfs_lsn_t               lsn,
+                        uint                    flags);
 int       xfs_log_mount(struct xfs_mount        *mp,
                        struct xfs_buftarg      *log_target,
                        xfs_daddr_t             start_block,
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index c6244cc733c0..8952a392b5f3 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -242,7 +242,7 @@ typedef struct xlog_res {
 typedef struct xlog_ticket {
        sv_t               t_sema;       /* sleep on this semaphore      : 20 */
-        struct xlog_ticket *t_next;      /*                              :4|8 */
+        struct xlog_ticket *t_next;      /*                              :4|8 */
        struct xlog_ticket *t_prev;      /*                              :4|8 */
        xlog_tid_t         t_tid;        /* transaction identifier       : 4  */
        int                t_curr_res;   /* current reservation in bytes : 4  */
@@ -324,6 +324,19 @@ typedef struct xlog_rec_ext_header {
 * - ic_offset is the current number of bytes written to in this iclog.
 * - ic_refcnt is bumped when someone is writing to the log.
 * - ic_state is the state of the iclog.
+ *
+ * Because of cacheline contention on large machines, we need to separate
+ * various resources onto different cachelines. To start with, make the
+ * structure cacheline aligned. The following fields can be contended on
+ * by independent processes:
+ *
+ *      - ic_callback_*
+ *      - ic_refcnt
+ *      - fields protected by the global l_icloglock
+ *
+ * so we need to ensure that these fields are located in separate cachelines.
+ * We'll put all the read-only and l_icloglock fields in the first cacheline,
+ * and move everything else out to subsequent cachelines.
 */
 typedef struct xlog_iclog_fields {
        sv_t                    ic_forcesema;
@@ -332,17 +345,22 @@ typedef struct xlog_iclog_fields {
        struct xlog_in_core     *ic_prev;
        struct xfs_buf          *ic_bp;
        struct log              *ic_log;
-        xfs_log_callback_t      *ic_callback;
-        xfs_log_callback_t      **ic_callback_tail;
-#ifdef XFS_LOG_TRACE
-        struct ktrace           *ic_trace;
-#endif
        int                     ic_size;
        int                     ic_offset;
-        int                     ic_refcnt;
        int                     ic_bwritecnt;
        ushort_t                ic_state;
        char                    *ic_datap;      /* pointer to iclog data */
+#ifdef XFS_LOG_TRACE
+        struct ktrace           *ic_trace;
+#endif
+        /* Callback structures need their own cacheline */
+        spinlock_t              ic_callback_lock ____cacheline_aligned_in_smp;
+        xfs_log_callback_t      *ic_callback;
+        xfs_log_callback_t      **ic_callback_tail;
+        /* reference counts need their own cacheline */
+        atomic_t                ic_refcnt ____cacheline_aligned_in_smp;
 } xlog_iclog_fields_t;
 typedef union xlog_in_core2 {
@@ -366,6 +384,7 @@ typedef struct xlog_in_core {
 #define ic_bp           hic_fields.ic_bp
 #define ic_log          hic_fields.ic_log
 #define ic_callback     hic_fields.ic_callback
+#define ic_callback_lock hic_fields.ic_callback_lock
 #define ic_callback_tail hic_fields.ic_callback_tail
 #define ic_trace        hic_fields.ic_trace
 #define ic_size         hic_fields.ic_size
@@ -383,43 +402,46 @@ typedef struct xlog_in_core {
 * that round off problems won't occur when releasing partial reservations.
 */
 typedef struct log {
+        /* The following fields don't need locking */
+        struct xfs_mount        *l_mp;          /* mount point */
+        struct xfs_buf          *l_xbuf;        /* extra buffer for log
+                                                 * wrapping */
+        struct xfs_buftarg      *l_targ;        /* buftarg of log */
+        uint                    l_flags;
+        uint                    l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */
+        struct xfs_buf_cancel   **l_buf_cancel_table;
+        int                     l_iclog_hsize;  /* size of iclog header */
+        int                     l_iclog_heads;  /* # of iclog header sectors */
+        uint                    l_sectbb_log;   /* log2 of sector size in BBs */
+        uint                    l_sectbb_mask;  /* sector size (in BBs)
+                                                 * alignment mask */
+        int                     l_iclog_size;   /* size of log in bytes */
+        int                     l_iclog_size_log; /* log power size of log */
+        int                     l_iclog_bufs;   /* number of iclog buffers */
+        xfs_daddr_t             l_logBBstart;   /* start block of log */
+        int                     l_logsize;      /* size of log in bytes */
+        int                     l_logBBsize;    /* size of log in BB chunks */
        /* The following block of fields are changed while holding icloglock */
-        sema_t                  l_flushsema;    /* iclog flushing semaphore */
+        sema_t                  l_flushsema ____cacheline_aligned_in_smp;
+                                                /* iclog flushing semaphore */
        int                     l_flushcnt;     /* # of procs waiting on this
                                                 * sema */
-        int                     l_ticket_cnt;   /* free ticket count */
-        int                     l_ticket_tcnt;  /* total ticket count */
        int                     l_covered_state;/* state of "covering disk
                                                 * log entries" */
-        xlog_ticket_t           *l_freelist;    /* free list of tickets */
-        xlog_ticket_t           *l_unmount_free;/* kmem_free these addresses */
-        xlog_ticket_t           *l_tail;        /* free list of tickets */
        xlog_in_core_t          *l_iclog;       /* head log queue       */
        spinlock_t              l_icloglock;    /* grab to change iclog state */
        xfs_lsn_t               l_tail_lsn;     /* lsn of 1st LR with unflushed
                                                 * buffers */
        xfs_lsn_t               l_last_sync_lsn;/* lsn of last LR on disk */
-        struct xfs_mount        *l_mp;          /* mount point */
-        struct xfs_buf          *l_xbuf;        /* extra buffer for log
-                                                 * wrapping */
-        struct xfs_buftarg      *l_targ;        /* buftarg of log */
-        xfs_daddr_t             l_logBBstart;   /* start block of log */
-        int                     l_logsize;      /* size of log in bytes */
-        int                     l_logBBsize;    /* size of log in BB chunks */
        int                     l_curr_cycle;   /* Cycle number of log writes */
        int                     l_prev_cycle;   /* Cycle number before last
                                                 * block increment */
        int                     l_curr_block;   /* current logical log block */
        int                     l_prev_block;   /* previous logical log block */
-        int                     l_iclog_size;   /* size of log in bytes */
-        int                     l_iclog_size_log; /* log power size of log */
-        int                     l_iclog_bufs;   /* number of iclog buffers */
-        /* The following field are used for debugging; need to hold icloglock */
-        char                    *l_iclog_bak[XLOG_MAX_ICLOGS];
        /* The following block of fields are changed while holding grant_lock */
-        spinlock_t              l_grant_lock;
+        spinlock_t              l_grant_lock ____cacheline_aligned_in_smp;
        xlog_ticket_t           *l_reserve_headq;
        xlog_ticket_t           *l_write_headq;
        int                     l_grant_reserve_cycle;
@@ -427,19 +449,16 @@ typedef struct log {
        int                     l_grant_write_cycle;
        int                     l_grant_write_bytes;
-        /* The following fields don't need locking */
 #ifdef XFS_LOG_TRACE
        struct ktrace           *l_trace;
        struct ktrace           *l_grant_trace;
 #endif
-        uint                    l_flags;
-        uint                    l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */
+        /* The following field are used for debugging; need to hold icloglock */
-        struct xfs_buf_cancel   **l_buf_cancel_table;
+#ifdef DEBUG
-        int                     l_iclog_hsize;  /* size of iclog header */
+        char                    *l_iclog_bak[XLOG_MAX_ICLOGS];
-        int                     l_iclog_heads;  /* # of iclog header sectors */
+#endif
-        uint                    l_sectbb_log;   /* log2 of sector size in BBs */
-        uint                    l_sectbb_mask;  /* sector size (in BBs)
-                                                 * alignment mask */
 } xlog_t;
 #define XLOG_FORCED_SHUTDOWN(log)       ((log)->l_flags & XLOG_IO_ERROR)
@@ -459,6 +478,8 @@ extern struct xfs_buf *xlog_get_bp(xlog_t *, int);
 extern void      xlog_put_bp(struct xfs_buf *);
 extern int       xlog_bread(xlog_t *, xfs_daddr_t, int, struct xfs_buf *);
+extern kmem_zone_t      *xfs_log_ticket_zone;
 /* iclog tracing */
 #define XLOG_TRACE_GRAB_FLUSH  1
 #define XLOG_TRACE_REL_FLUSH   2
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index b2b70eba282c..e65ab4af0955 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -46,6 +46,7 @@
 #include "xfs_trans_priv.h"
 #include "xfs_quota.h"
 #include "xfs_rw.h"
+#include "xfs_utils.h"
 STATIC int      xlog_find_zeroed(xlog_t *, xfs_daddr_t *);
 STATIC int      xlog_clear_stale_blocks(xlog_t *, xfs_lsn_t);
@@ -120,7 +121,8 @@ xlog_bread(
        XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp);
        xfsbdstrat(log->l_mp, bp);
-        if ((error = xfs_iowait(bp)))
+        error = xfs_iowait(bp);
+        if (error)
                xfs_ioerror_alert("xlog_bread", log->l_mp,
                                  bp, XFS_BUF_ADDR(bp));
        return error;
@@ -191,7 +193,7 @@ xlog_header_check_dump(
 {
        int                     b;
-        cmn_err(CE_DEBUG, "%s:  SB : uuid = ", __FUNCTION__);
+        cmn_err(CE_DEBUG, "%s:  SB : uuid = ", __func__);
        for (b = 0; b < 16; b++)
                cmn_err(CE_DEBUG, "%02x", ((uchar_t *)&mp->m_sb.sb_uuid)[b]);
        cmn_err(CE_DEBUG, ", fmt = %d\n", XLOG_FMT);
@@ -1160,10 +1162,14 @@ xlog_write_log_records(
                if (j == 0 && (start_block + endcount > ealign)) {
                        offset = XFS_BUF_PTR(bp);
                        balign = BBTOB(ealign - start_block);
-                        XFS_BUF_SET_PTR(bp, offset + balign, BBTOB(sectbb));
+                        error = XFS_BUF_SET_PTR(bp, offset + balign,
-                        if ((error = xlog_bread(log, ealign, sectbb, bp)))
+                                                BBTOB(sectbb));
+                        if (!error)
+                                error = xlog_bread(log, ealign, sectbb, bp);
+                        if (!error)
+                                error = XFS_BUF_SET_PTR(bp, offset, bufblks);
+                        if (error)
                                break;
-                        XFS_BUF_SET_PTR(bp, offset, bufblks);
                }
                offset = xlog_align(log, start_block, endcount, bp);
@@ -2280,7 +2286,9 @@ xlog_recover_do_inode_trans(
                 * invalidate the buffer when we write it out below.
                 */
                imap.im_blkno = 0;
-                xfs_imap(log->l_mp, NULL, ino, &imap, 0);
+                error = xfs_imap(log->l_mp, NULL, ino, &imap, 0);
+                if (error)
+                        goto error;
        }
        /*
@@ -2964,7 +2972,7 @@ xlog_recover_process_data(
 * Process an extent free intent item that was recovered from
 * the log.  We need to free the extents that it describes.
 */
-STATIC void
+STATIC int
 xlog_recover_process_efi(
        xfs_mount_t             *mp,
        xfs_efi_log_item_t      *efip)
@@ -2972,6 +2980,7 @@ xlog_recover_process_efi(
        xfs_efd_log_item_t      *efdp;
        xfs_trans_t             *tp;
        int                     i;
+        int                     error = 0;
        xfs_extent_t            *extp;
        xfs_fsblock_t           startblock_fsb;
@@ -2995,23 +3004,32 @@ xlog_recover_process_efi(
                         * free the memory associated with it.
                         */
                        xfs_efi_release(efip, efip->efi_format.efi_nextents);
-                        return;
+                        return XFS_ERROR(EIO);
                }
        }
        tp = xfs_trans_alloc(mp, 0);
-        xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 0, 0);
+        error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 0, 0);
+        if (error)
+                goto abort_error;
        efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents);
        for (i = 0; i < efip->efi_format.efi_nextents; i++) {
                extp = &(efip->efi_format.efi_extents[i]);
-                xfs_free_extent(tp, extp->ext_start, extp->ext_len);
+                error = xfs_free_extent(tp, extp->ext_start, extp->ext_len);
+                if (error)
+                        goto abort_error;
                xfs_trans_log_efd_extent(tp, efdp, extp->ext_start,
                                         extp->ext_len);
        }
        efip->efi_flags |= XFS_EFI_RECOVERED;
-        xfs_trans_commit(tp, 0);
+        error = xfs_trans_commit(tp, 0);
+        return error;
+abort_error:
+        xfs_trans_cancel(tp, XFS_TRANS_ABORT);
+        return error;
 }
 /*
@@ -3059,7 +3077,7 @@ xlog_recover_check_ail(
 * everything already in the AIL, we stop processing as soon as
 * we see something other than an EFI in the AIL.
 */
-STATIC void
+STATIC int
 xlog_recover_process_efis(
        xlog_t                  *log)
 {
@@ -3067,6 +3085,7 @@ xlog_recover_process_efis(
        xfs_efi_log_item_t      *efip;
        int                     gen;
        xfs_mount_t             *mp;
+        int                     error = 0;
        mp = log->l_mp;
        spin_lock(&mp->m_ail_lock);
@@ -3091,11 +3110,14 @@ xlog_recover_process_efis(
                }
                spin_unlock(&mp->m_ail_lock);
-                xlog_recover_process_efi(mp, efip);
+                error = xlog_recover_process_efi(mp, efip);
+                if (error)
+                        return error;
                spin_lock(&mp->m_ail_lock);
                lip = xfs_trans_next_ail(mp, lip, &gen, NULL);
        }
        spin_unlock(&mp->m_ail_lock);
+        return error;
 }
 /*
@@ -3115,21 +3137,18 @@ xlog_recover_clear_agi_bucket(
        int             error;
        tp = xfs_trans_alloc(mp, XFS_TRANS_CLEAR_AGI_BUCKET);
-        xfs_trans_reserve(tp, 0, XFS_CLEAR_AGI_BUCKET_LOG_RES(mp), 0, 0, 0);
+        error = xfs_trans_reserve(tp, 0, XFS_CLEAR_AGI_BUCKET_LOG_RES(mp), 0, 0, 0);
+        if (!error)
-        error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
+                error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
                                   XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
                                   XFS_FSS_TO_BB(mp, 1), 0, &agibp);
-        if (error) {
+        if (error)
-                xfs_trans_cancel(tp, XFS_TRANS_ABORT);
+                goto out_abort;
-                return;
-        }
+        error = EINVAL;
        agi = XFS_BUF_TO_AGI(agibp);
-        if (be32_to_cpu(agi->agi_magicnum) != XFS_AGI_MAGIC) {
+        if (be32_to_cpu(agi->agi_magicnum) != XFS_AGI_MAGIC)
-                xfs_trans_cancel(tp, XFS_TRANS_ABORT);
+                goto out_abort;
-                return;
-        }
        agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
        offset = offsetof(xfs_agi_t, agi_unlinked) +
@@ -3137,7 +3156,17 @@ xlog_recover_clear_agi_bucket(
        xfs_trans_log_buf(tp, agibp, offset,
                          (offset + sizeof(xfs_agino_t) - 1));
-        (void) xfs_trans_commit(tp, 0);
+        error = xfs_trans_commit(tp, 0);
+        if (error)
+                goto out_error;
+        return;
+out_abort:
+        xfs_trans_cancel(tp, XFS_TRANS_ABORT);
+out_error:
+        xfs_fs_cmn_err(CE_WARN, mp, "xlog_recover_clear_agi_bucket: "
+                        "failed to clear agi %d. Continuing.", agno);
+        return;
 }
 /*
@@ -3214,7 +3243,8 @@ xlog_recover_process_iunlinks(
                                         * next inode in the bucket.
                                         */
                                        error = xfs_itobp(mp, NULL, ip, &dip,
-                                                        &ibp, 0, 0);
+                                                        &ibp, 0, 0,
+                                                        XFS_BUF_LOCK);
                                        ASSERT(error || (dip != NULL));
                                }
@@ -3247,7 +3277,7 @@ xlog_recover_process_iunlinks(
                                        if (ip->i_d.di_mode == 0)
                                                xfs_iput_new(ip, 0);
                                        else
-                                                VN_RELE(XFS_ITOV(ip));
+                                                IRELE(ip);
                                } else {
                                        /*
                                         * We can't read in the inode
@@ -3445,7 +3475,7 @@ xlog_valid_rec_header(
            (!rhead->h_version ||
            (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) {
                xlog_warn("XFS: %s: unrecognised log version (%d).",
-                        __FUNCTION__, be32_to_cpu(rhead->h_version));
+                        __func__, be32_to_cpu(rhead->h_version));
                return XFS_ERROR(EIO);
        }
@@ -3604,15 +3634,19 @@ xlog_do_recovery_pass(
                                 *   _first_, then the log start (LR header end)
                                 *   - order is important.
                                 */
+                                wrapped_hblks = hblks - split_hblks;
                                bufaddr = XFS_BUF_PTR(hbp);
-                                XFS_BUF_SET_PTR(hbp,
+                                error = XFS_BUF_SET_PTR(hbp,
                                                bufaddr + BBTOB(split_hblks),
                                                BBTOB(hblks - split_hblks));
-                                wrapped_hblks = hblks - split_hblks;
+                                if (!error)
-                                error = xlog_bread(log, 0, wrapped_hblks, hbp);
+                                        error = xlog_bread(log, 0,
+                                                        wrapped_hblks, hbp);
+                                if (!error)
+                                        error = XFS_BUF_SET_PTR(hbp, bufaddr,
+                                                        BBTOB(hblks));
                                if (error)
                                        goto bread_err2;
-                                XFS_BUF_SET_PTR(hbp, bufaddr, BBTOB(hblks));
                                if (!offset)
                                        offset = xlog_align(log, 0,
                                                        wrapped_hblks, hbp);
@@ -3664,13 +3698,18 @@ xlog_do_recovery_pass(
                                 *   - order is important.
                                 */
                                bufaddr = XFS_BUF_PTR(dbp);
-                                XFS_BUF_SET_PTR(dbp,
+                                error = XFS_BUF_SET_PTR(dbp,
                                                bufaddr + BBTOB(split_bblks),
                                                BBTOB(bblks - split_bblks));
-                                if ((error = xlog_bread(log, wrapped_hblks,
+                                if (!error)
-                                                bblks - split_bblks, dbp)))
+                                        error = xlog_bread(log, wrapped_hblks,
+                                                        bblks - split_bblks,
+                                                        dbp);
+                                if (!error)
+                                        error = XFS_BUF_SET_PTR(dbp, bufaddr,
+                                                        h_size);
+                                if (error)
                                        goto bread_err2;
-                                XFS_BUF_SET_PTR(dbp, bufaddr, h_size);
                                if (!offset)
                                        offset = xlog_align(log, wrapped_hblks,
                                                bblks - split_bblks, dbp);
@@ -3826,7 +3865,8 @@ xlog_do_recover(
        XFS_BUF_READ(bp);
        XFS_BUF_UNASYNC(bp);
        xfsbdstrat(log->l_mp, bp);
-        if ((error = xfs_iowait(bp))) {
+        error = xfs_iowait(bp);
+        if (error) {
                xfs_ioerror_alert("xlog_do_recover",
                                  log->l_mp, bp, XFS_BUF_ADDR(bp));
                ASSERT(0);
@@ -3917,7 +3957,14 @@ xlog_recover_finish(
         * rather than accepting new requests.
         */
        if (log->l_flags & XLOG_RECOVERY_NEEDED) {
-                xlog_recover_process_efis(log);
+                int     error;
+                error = xlog_recover_process_efis(log);
+                if (error) {
+                        cmn_err(CE_ALERT,
+                                "Failed to recover EFIs on filesystem: %s",
+                                log->l_mp->m_fsname);
+                        return error;
+                }
                /*
                 * Sync the log to get all the EFIs out of the AIL.
                 * This isn't absolutely necessary, but it helps in
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 8ed164eb9544..2fec452afbcc 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -43,8 +43,9 @@
 #include "xfs_rw.h"
 #include "xfs_quota.h"
 #include "xfs_fsops.h"
+#include "xfs_utils.h"
-STATIC void     xfs_mount_log_sb(xfs_mount_t *, __int64_t);
+STATIC int      xfs_mount_log_sb(xfs_mount_t *, __int64_t);
 STATIC int      xfs_uuid_mount(xfs_mount_t *);
 STATIC void     xfs_uuid_unmount(xfs_mount_t *mp);
 STATIC void     xfs_unmountfs_wait(xfs_mount_t *);
@@ -57,7 +58,7 @@ STATIC void	xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t,
 STATIC void     xfs_icsb_sync_counters(xfs_mount_t *);
 STATIC int      xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t,
                                                int64_t, int);
-STATIC int      xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
+STATIC void     xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
 #else
@@ -956,7 +957,6 @@ xfs_mountfs(
 {
        xfs_sb_t        *sbp = &(mp->m_sb);
        xfs_inode_t     *rip;
-        bhv_vnode_t     *rvp = NULL;
        __uint64_t      resblks;
        __int64_t       update_flags = 0LL;
        uint            quotamount, quotaflags;
@@ -964,11 +964,6 @@ xfs_mountfs(
        int             uuid_mounted = 0;
        int             error = 0;
-        if (mp->m_sb_bp == NULL) {
-                error = xfs_readsb(mp, mfsi_flags);
-                if (error)
-                        return error;
-        }
        xfs_mount_common(mp, sbp);
        /*
@@ -1163,7 +1158,6 @@ xfs_mountfs(
        }
        ASSERT(rip != NULL);
-        rvp = XFS_ITOV(rip);
        if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) {
                cmn_err(CE_WARN, "XFS: corrupted root inode");
@@ -1195,8 +1189,13 @@ xfs_mountfs(
        /*
         * If fs is not mounted readonly, then update the superblock changes.
         */
-        if (update_flags && !(mp->m_flags & XFS_MOUNT_RDONLY))
+        if (update_flags && !(mp->m_flags & XFS_MOUNT_RDONLY)) {
-                xfs_mount_log_sb(mp, update_flags);
+                error = xfs_mount_log_sb(mp, update_flags);
+                if (error) {
+                        cmn_err(CE_WARN, "XFS: failed to write sb changes");
+                        goto error4;
+                }
+        }
        /*
         * Initialise the XFS quota management subsystem for this mount
@@ -1233,12 +1232,15 @@ xfs_mountfs(
         *
         * We default to 5% or 1024 fsbs of space reserved, whichever is smaller.
         * This may drive us straight to ENOSPC on mount, but that implies
-         * we were already there on the last unmount.
+         * we were already there on the last unmount. Warn if this occurs.
         */
        resblks = mp->m_sb.sb_dblocks;
        do_div(resblks, 20);
        resblks = min_t(__uint64_t, resblks, 1024);
-        xfs_reserve_blocks(mp, &resblks, NULL);
+        error = xfs_reserve_blocks(mp, &resblks, NULL);
+        if (error)
+                cmn_err(CE_WARN, "XFS: Unable to allocate reserve blocks. "
+                                "Continuing without a reserve pool.");
        return 0;
@@ -1246,7 +1248,7 @@ xfs_mountfs(
        /*
         * Free up the root inode.
         */
-        VN_RELE(rvp);
+        IRELE(rip);
 error3:
        xfs_log_unmount_dealloc(mp);
 error2:
@@ -1274,6 +1276,7 @@ int
 xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
 {
        __uint64_t      resblks;
+        int             error = 0;
        /*
         * We can potentially deadlock here if we have an inode cluster
@@ -1317,9 +1320,15 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
         * value does not matter....
         */
        resblks = 0;
-        xfs_reserve_blocks(mp, &resblks, NULL);
+        error = xfs_reserve_blocks(mp, &resblks, NULL);
+        if (error)
+                cmn_err(CE_WARN, "XFS: Unable to free reserved block pool. "
+                                "Freespace may not be correct on next mount.");
-        xfs_log_sbcount(mp, 1);
+        error = xfs_log_sbcount(mp, 1);
+        if (error)
+                cmn_err(CE_WARN, "XFS: Unable to update superblock counters. "
+                                "Freespace may not be correct on next mount.");
        xfs_unmountfs_writesb(mp);
        xfs_unmountfs_wait(mp);                 /* wait for async bufs */
        xfs_log_unmount(mp);                    /* Done! No more fs ops. */
@@ -1411,9 +1420,8 @@ xfs_log_sbcount(
        xfs_mod_sb(tp, XFS_SB_IFREE | XFS_SB_ICOUNT | XFS_SB_FDBLOCKS);
        if (sync)
                xfs_trans_set_sync(tp);
-        xfs_trans_commit(tp, 0);
+        error = xfs_trans_commit(tp, 0);
+        return error;
-        return 0;
 }
 STATIC void
@@ -1462,7 +1470,6 @@ xfs_unmountfs_writesb(xfs_mount_t *mp)
                XFS_BUF_UNASYNC(sbp);
                ASSERT(XFS_BUF_TARGET(sbp) == mp->m_ddev_targp);
                xfsbdstrat(mp, sbp);
-                /* Nevermind errors we might get here. */
                error = xfs_iowait(sbp);
                if (error)
                        xfs_ioerror_alert("xfs_unmountfs_writesb",
@@ -1911,24 +1918,27 @@ xfs_uuid_unmount(
 * be altered by the mount options, as well as any potential sb_features2
 * fixup. Only the first superblock is updated.
 */
-STATIC void
+STATIC int
 xfs_mount_log_sb(
        xfs_mount_t     *mp,
        __int64_t       fields)
 {
        xfs_trans_t     *tp;
+        int             error;
        ASSERT(fields & (XFS_SB_UNIT | XFS_SB_WIDTH | XFS_SB_UUID |
                         XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2));
        tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT);
-        if (xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
+        error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
-                                XFS_DEFAULT_LOG_COUNT)) {
+                                XFS_DEFAULT_LOG_COUNT);
+        if (error) {
                xfs_trans_cancel(tp, 0);
-                return;
+                return error;
        }
        xfs_mod_sb(tp, fields);
-        xfs_trans_commit(tp, 0);
+        error = xfs_trans_commit(tp, 0);
+        return error;
 }
@@ -2189,7 +2199,7 @@ xfs_icsb_counter_disabled(
        return test_bit(field, &mp->m_icsb_counters);
 }
-STATIC int
+STATIC void
 xfs_icsb_disable_counter(
        xfs_mount_t     *mp,
        xfs_sb_field_t  field)
@@ -2207,7 +2217,7 @@ xfs_icsb_disable_counter(
         * the m_icsb_mutex.
         */
        if (xfs_icsb_counter_disabled(mp, field))
-                return 0;
+                return;
        xfs_icsb_lock_all_counters(mp);
        if (!test_and_set_bit(field, &mp->m_icsb_counters)) {
@@ -2230,8 +2240,6 @@ xfs_icsb_disable_counter(
        }
        xfs_icsb_unlock_all_counters(mp);
-        return 0;
 }
 STATIC void
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 1d8a4728d847..1ed575110ff0 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -66,17 +66,17 @@ struct xfs_mru_cache;
 * Prototypes and functions for the Data Migration subsystem.
 */
-typedef int     (*xfs_send_data_t)(int, bhv_vnode_t *,
+typedef int     (*xfs_send_data_t)(int, struct xfs_inode *,
-                        xfs_off_t, size_t, int, bhv_vrwlock_t *);
+                        xfs_off_t, size_t, int, int *);
 typedef int     (*xfs_send_mmap_t)(struct vm_area_struct *, uint);
-typedef int     (*xfs_send_destroy_t)(bhv_vnode_t *, dm_right_t);
+typedef int     (*xfs_send_destroy_t)(struct xfs_inode *, dm_right_t);
 typedef int     (*xfs_send_namesp_t)(dm_eventtype_t, struct xfs_mount *,
-                        bhv_vnode_t *,
+                        struct xfs_inode *, dm_right_t,
-                        dm_right_t, bhv_vnode_t *, dm_right_t,
+                        struct xfs_inode *, dm_right_t,
-                        char *, char *, mode_t, int, int);
+                        const char *, const char *, mode_t, int, int);
 typedef int     (*xfs_send_mount_t)(struct xfs_mount *, dm_right_t,
                        char *, char *);
-typedef void    (*xfs_send_unmount_t)(struct xfs_mount *, bhv_vnode_t *,
+typedef void    (*xfs_send_unmount_t)(struct xfs_mount *, struct xfs_inode *,
                        dm_right_t, mode_t, int, int);
 typedef struct xfs_dmops {
@@ -88,20 +88,20 @@ typedef struct xfs_dmops {
        xfs_send_unmount_t      xfs_send_unmount;
 } xfs_dmops_t;
-#define XFS_SEND_DATA(mp, ev,vp,off,len,fl,lock) \
+#define XFS_SEND_DATA(mp, ev,ip,off,len,fl,lock) \
-        (*(mp)->m_dm_ops->xfs_send_data)(ev,vp,off,len,fl,lock)
+        (*(mp)->m_dm_ops->xfs_send_data)(ev,ip,off,len,fl,lock)
 #define XFS_SEND_MMAP(mp, vma,fl) \
        (*(mp)->m_dm_ops->xfs_send_mmap)(vma,fl)
-#define XFS_SEND_DESTROY(mp, vp,right) \
+#define XFS_SEND_DESTROY(mp, ip,right) \
-        (*(mp)->m_dm_ops->xfs_send_destroy)(vp,right)
+        (*(mp)->m_dm_ops->xfs_send_destroy)(ip,right)
 #define XFS_SEND_NAMESP(mp, ev,b1,r1,b2,r2,n1,n2,mode,rval,fl) \
        (*(mp)->m_dm_ops->xfs_send_namesp)(ev,NULL,b1,r1,b2,r2,n1,n2,mode,rval,fl)
 #define XFS_SEND_PREUNMOUNT(mp,b1,r1,b2,r2,n1,n2,mode,rval,fl) \
        (*(mp)->m_dm_ops->xfs_send_namesp)(DM_EVENT_PREUNMOUNT,mp,b1,r1,b2,r2,n1,n2,mode,rval,fl)
 #define XFS_SEND_MOUNT(mp,right,path,name) \
        (*(mp)->m_dm_ops->xfs_send_mount)(mp,right,path,name)
-#define XFS_SEND_UNMOUNT(mp, vp,right,mode,rval,fl) \
+#define XFS_SEND_UNMOUNT(mp, ip,right,mode,rval,fl) \
-        (*(mp)->m_dm_ops->xfs_send_unmount)(mp,vp,right,mode,rval,fl)
+        (*(mp)->m_dm_ops->xfs_send_unmount)(mp,ip,right,mode,rval,fl)
 /*
@@ -220,7 +220,7 @@ extern void	xfs_icsb_sync_counters_flags(struct xfs_mount *, int);
 #endif
 typedef struct xfs_ail {
-        xfs_ail_entry_t         xa_ail;
+        struct list_head        xa_ail;
        uint                    xa_gen;
        struct task_struct      *xa_task;
        xfs_lsn_t               xa_target;
@@ -401,7 +401,7 @@ typedef struct xfs_mount {
 /*
 * Allow large block sizes to be reported to userspace programs if the
- * "largeio" mount option is used. 
+ * "largeio" mount option is used.
 *
 * If compatibility mode is specified, simply return the basic unit of caching
 * so that we don't get inefficient read/modify/write I/O from user apps.
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index 7eb157a59f9e..ee371890d85d 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -36,7 +36,6 @@
 #include "xfs_bmap.h"
 #include "xfs_error.h"
 #include "xfs_quota.h"
-#include "xfs_refcache.h"
 #include "xfs_utils.h"
 #include "xfs_trans_space.h"
 #include "xfs_vnodeops.h"
@@ -84,25 +83,23 @@ int xfs_rename_skip, xfs_rename_nskip;
 */
 STATIC int
 xfs_lock_for_rename(
-        xfs_inode_t     *dp1,   /* old (source) directory inode */
+        xfs_inode_t     *dp1,   /* in: old (source) directory inode */
-        xfs_inode_t     *dp2,   /* new (target) directory inode */
+        xfs_inode_t     *dp2,   /* in: new (target) directory inode */
-        bhv_vname_t     *vname1,/* old entry name */
+        xfs_inode_t     *ip1,   /* in: inode of old entry */
-        bhv_vname_t     *vname2,/* new entry name */
+        struct xfs_name *name2, /* in: new entry name */
-        xfs_inode_t     **ipp1, /* inode of old entry */
+        xfs_inode_t     **ipp2, /* out: inode of new entry, if it
-        xfs_inode_t     **ipp2, /* inode of new entry, if it
                                   already exists, NULL otherwise. */
-        xfs_inode_t     **i_tab,/* array of inode returned, sorted */
+        xfs_inode_t     **i_tab,/* out: array of inode returned, sorted */
-        int             *num_inodes)  /* number of inodes in array */
+        int             *num_inodes)  /* out: number of inodes in array */
 {
-        xfs_inode_t             *ip1, *ip2, *temp;
+        xfs_inode_t             *ip2 = NULL;
+        xfs_inode_t             *temp;
        xfs_ino_t               inum1, inum2;
        int                     error;
        int                     i, j;
        uint                    lock_mode;
        int                     diff_dirs = (dp1 != dp2);
-        ip2 = NULL;
        /*
         * First, find out the current inums of the entries so that we
         * can determine the initial locking order.  We'll have to
@@ -110,27 +107,20 @@ xfs_lock_for_rename(
         * to see if we still have the right inodes, directories, etc.
         */
        lock_mode = xfs_ilock_map_shared(dp1);
-        error = xfs_get_dir_entry(vname1, &ip1);
+        IHOLD(ip1);
-        if (error) {
+        xfs_itrace_ref(ip1);
-                xfs_iunlock_map_shared(dp1, lock_mode);
-                return error;
-        }
        inum1 = ip1->i_ino;
-        ASSERT(ip1);
-        xfs_itrace_ref(ip1);
        /*
         * Unlock dp1 and lock dp2 if they are different.
         */
        if (diff_dirs) {
                xfs_iunlock_map_shared(dp1, lock_mode);
                lock_mode = xfs_ilock_map_shared(dp2);
        }
-        error = xfs_dir_lookup_int(dp2, lock_mode, vname2, &inum2, &ip2);
+        error = xfs_dir_lookup_int(dp2, lock_mode, name2, &inum2, &ip2);
        if (error == ENOENT) {          /* target does not need to exist. */
                inum2 = 0;
        } else if (error) {
@@ -162,6 +152,7 @@ xfs_lock_for_rename(
                *num_inodes = 4;
                i_tab[3] = ip2;
        }
+        *ipp2 = i_tab[3];
        /*
         * Sort the elements via bubble sort.  (Remember, there are at
@@ -199,21 +190,6 @@ xfs_lock_for_rename(
                xfs_lock_inodes(i_tab, *num_inodes, 0, XFS_ILOCK_SHARED);
        }
-        /*
-         * Set the return value. Null out any unused entries in i_tab.
-         */
-        *ipp1 = *ipp2 = NULL;
-        for (i=0; i < *num_inodes; i++) {
-                if (i_tab[i]->i_ino == inum1) {
-                        *ipp1 = i_tab[i];
-                }
-                if (i_tab[i]->i_ino == inum2) {
-                        *ipp2 = i_tab[i];
-                }
-        }
-        for (;i < 4; i++) {
-                i_tab[i] = NULL;
-        }
        return 0;
 }
@@ -223,13 +199,13 @@ xfs_lock_for_rename(
 int
 xfs_rename(
        xfs_inode_t     *src_dp,
-        bhv_vname_t     *src_vname,
+        struct xfs_name *src_name,
-        bhv_vnode_t     *target_dir_vp,
+        xfs_inode_t     *src_ip,
-        bhv_vname_t     *target_vname)
+        xfs_inode_t     *target_dp,
+        struct xfs_name *target_name)
 {
-        bhv_vnode_t     *src_dir_vp = XFS_ITOV(src_dp);
        xfs_trans_t     *tp;
-        xfs_inode_t     *target_dp, *src_ip, *target_ip;
+        xfs_inode_t     *target_ip;
        xfs_mount_t     *mp = src_dp->i_mount;
        int             new_parent;             /* moving to a new dir */
        int             src_is_directory;       /* src_name is a directory */
@@ -243,29 +219,16 @@ xfs_rename(
        int             spaceres;
        int             target_link_zero = 0;
        int             num_inodes;
-        char            *src_name = VNAME(src_vname);
-        char            *target_name = VNAME(target_vname);
-        int             src_namelen = VNAMELEN(src_vname);
-        int             target_namelen = VNAMELEN(target_vname);
        xfs_itrace_entry(src_dp);
-        xfs_itrace_entry(xfs_vtoi(target_dir_vp));
+        xfs_itrace_entry(target_dp);
-        /*
-         * Find the XFS behavior descriptor for the target directory
-         * vnode since it was not handed to us.
-         */
-        target_dp = xfs_vtoi(target_dir_vp);
-        if (target_dp == NULL) {
-                return XFS_ERROR(EXDEV);
-        }
        if (DM_EVENT_ENABLED(src_dp, DM_EVENT_RENAME) ||
            DM_EVENT_ENABLED(target_dp, DM_EVENT_RENAME)) {
                error = XFS_SEND_NAMESP(mp, DM_EVENT_RENAME,
-                                        src_dir_vp, DM_RIGHT_NULL,
+                                        src_dp, DM_RIGHT_NULL,
-                                        target_dir_vp, DM_RIGHT_NULL,
+                                        target_dp, DM_RIGHT_NULL,
-                                        src_name, target_name,
+                                        src_name->name, target_name->name,
                                        0, 0, 0);
                if (error) {
                        return error;
@@ -282,10 +245,8 @@ xfs_rename(
         * does not exist in the source directory.
         */
        tp = NULL;
-        error = xfs_lock_for_rename(src_dp, target_dp, src_vname,
+        error = xfs_lock_for_rename(src_dp, target_dp, src_ip, target_name,
-                        target_vname, &src_ip, &target_ip, inodes,
+                                        &target_ip, inodes, &num_inodes);
-                        &num_inodes);
        if (error) {
                /*
                 * We have nothing locked, no inode references, and
@@ -331,7 +292,7 @@ xfs_rename(
        XFS_BMAP_INIT(&free_list, &first_block);
        tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME);
        cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
-        spaceres = XFS_RENAME_SPACE_RES(mp, target_namelen);
+        spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len);
        error = xfs_trans_reserve(tp, spaceres, XFS_RENAME_LOG_RES(mp), 0,
                        XFS_TRANS_PERM_LOG_RES, XFS_RENAME_LOG_COUNT);
        if (error == ENOSPC) {
@@ -365,10 +326,10 @@ xfs_rename(
         * them when they unlock the inodes.  Also, we need to be careful
         * not to add an inode to the transaction more than once.
         */
-        VN_HOLD(src_dir_vp);
+        IHOLD(src_dp);
        xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL);
        if (new_parent) {
-                VN_HOLD(target_dir_vp);
+                IHOLD(target_dp);
                xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL);
        }
        if ((src_ip != src_dp) && (src_ip != target_dp)) {
@@ -389,9 +350,8 @@ xfs_rename(
                 * If there's no space reservation, check the entry will
                 * fit before actually inserting it.
                 */
-                if (spaceres == 0 &&
+                error = xfs_dir_canenter(tp, target_dp, target_name, spaceres);
-                    (error = xfs_dir_canenter(tp, target_dp, target_name,
+                if (error)
-                                                target_namelen)))
                        goto error_return;
                /*
                 * If target does not exist and the rename crosses
@@ -399,8 +359,8 @@ xfs_rename(
                 * to account for the ".." reference from the new entry.
                 */
                error = xfs_dir_createname(tp, target_dp, target_name,
-                                           target_namelen, src_ip->i_ino,
+                                                src_ip->i_ino, &first_block,
-                                           &first_block, &free_list, spaceres);
+                                                &free_list, spaceres);
                if (error == ENOSPC)
                        goto error_return;
                if (error)
@@ -439,7 +399,7 @@ xfs_rename(
                 * name at the destination directory, remove it first.
                 */
                error = xfs_dir_replace(tp, target_dp, target_name,
-                                        target_namelen, src_ip->i_ino,
+                                        src_ip->i_ino,
                                        &first_block, &free_list, spaceres);
                if (error)
                        goto abort_return;
@@ -476,7 +436,8 @@ xfs_rename(
                 * Rewrite the ".." entry to point to the new
                 * directory.
                 */
-                error = xfs_dir_replace(tp, src_ip, "..", 2, target_dp->i_ino,
+                error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot,
+                                        target_dp->i_ino,
                                        &first_block, &free_list, spaceres);
                ASSERT(error != EEXIST);
                if (error)
@@ -512,8 +473,8 @@ xfs_rename(
                        goto abort_return;
        }
-        error = xfs_dir_removename(tp, src_dp, src_name, src_namelen,
+        error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
-                        src_ip->i_ino, &first_block, &free_list, spaceres);
+                                        &first_block, &free_list, spaceres);
        if (error)
                goto abort_return;
        xfs_ichgtime(src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -580,10 +541,8 @@ xfs_rename(
         * the vnode references.
         */
        error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-        if (target_ip != NULL) {
+        if (target_ip != NULL)
-                xfs_refcache_purge_ip(target_ip);
                IRELE(target_ip);
-        }
        /*
         * Let interposed file systems know about removed links.
         */
@@ -598,9 +557,9 @@ std_return:
        if (DM_EVENT_ENABLED(src_dp, DM_EVENT_POSTRENAME) ||
            DM_EVENT_ENABLED(target_dp, DM_EVENT_POSTRENAME)) {
                (void) XFS_SEND_NAMESP (mp, DM_EVENT_POSTRENAME,
-                                        src_dir_vp, DM_RIGHT_NULL,
+                                        src_dp, DM_RIGHT_NULL,
-                                        target_dir_vp, DM_RIGHT_NULL,
+                                        target_dp, DM_RIGHT_NULL,
-                                        src_name, target_name,
+                                        src_name->name, target_name->name,
                                        0, error, 0);
        }
        return error;
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 47082c01872d..a0dc6e5bc5b9 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -44,6 +44,7 @@
 #include "xfs_rw.h"
 #include "xfs_inode_item.h"
 #include "xfs_trans_space.h"
+#include "xfs_utils.h"
 /*
@@ -123,14 +124,14 @@ xfs_growfs_rt_alloc(
                                XFS_GROWRTALLOC_LOG_RES(mp), 0,
                                XFS_TRANS_PERM_LOG_RES,
                                XFS_DEFAULT_PERM_LOG_COUNT)))
-                        goto error_exit;
+                        goto error_cancel;
                cancelflags = XFS_TRANS_RELEASE_LOG_RES;
                /*
                 * Lock the inode.
                 */
                if ((error = xfs_trans_iget(mp, tp, ino, 0,
                                                XFS_ILOCK_EXCL, &ip)))
-                        goto error_exit;
+                        goto error_cancel;
                XFS_BMAP_INIT(&flist, &firstblock);
                /*
                 * Allocate blocks to the bitmap file.
@@ -143,14 +144,16 @@ xfs_growfs_rt_alloc(
                if (!error && nmap < 1)
                        error = XFS_ERROR(ENOSPC);
                if (error)
-                        goto error_exit;
+                        goto error_cancel;
                /*
                 * Free any blocks freed up in the transaction, then commit.
                 */
                error = xfs_bmap_finish(&tp, &flist, &committed);
                if (error)
-                        goto error_exit;
+                        goto error_cancel;
-                xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+                error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+                if (error)
+                        goto error;
                /*
                 * Now we need to clear the allocated blocks.
                 * Do this one block per transaction, to keep it simple.
@@ -165,13 +168,13 @@ xfs_growfs_rt_alloc(
                         */
                        if ((error = xfs_trans_reserve(tp, 0,
                                        XFS_GROWRTZERO_LOG_RES(mp), 0, 0, 0)))
-                                goto error_exit;
+                                goto error_cancel;
                        /*
                         * Lock the bitmap inode.
                         */
                        if ((error = xfs_trans_iget(mp, tp, ino, 0,
                                                        XFS_ILOCK_EXCL, &ip)))
-                                goto error_exit;
+                                goto error_cancel;
                        /*
                         * Get a buffer for the block.
                         */
@@ -180,14 +183,16 @@ xfs_growfs_rt_alloc(
                                mp->m_bsize, 0);
                        if (bp == NULL) {
                                error = XFS_ERROR(EIO);
-                                goto error_exit;
+                                goto error_cancel;
                        }
                        memset(XFS_BUF_PTR(bp), 0, mp->m_sb.sb_blocksize);
                        xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);
                        /*
                         * Commit the transaction.
                         */
-                        xfs_trans_commit(tp, 0);
+                        error = xfs_trans_commit(tp, 0);
+                        if (error)
+                                goto error;
                }
                /*
                 * Go on to the next extent, if any.
@@ -195,8 +200,9 @@ xfs_growfs_rt_alloc(
                oblocks = map.br_startoff + map.br_blockcount;
        }
        return 0;
-error_exit:
+error_cancel:
        xfs_trans_cancel(tp, cancelflags);
+error:
        return error;
 }
@@ -1875,6 +1881,7 @@ xfs_growfs_rt(
        xfs_trans_t     *tp;            /* transaction pointer */
        sbp = &mp->m_sb;
+        cancelflags = 0;
        /*
         * Initial error checking.
         */
@@ -2041,13 +2048,15 @@ xfs_growfs_rt(
                 */
                mp->m_rsumlevels = nrsumlevels;
                mp->m_rsumsize = nrsumsize;
-                /*
-                 * Commit the transaction.
+                error = xfs_trans_commit(tp, 0);
-                 */
+                if (error) {
-                xfs_trans_commit(tp, 0);
+                        tp = NULL;
+                        break;
+                }
        }
-        if (error)
+        if (error && tp)
                xfs_trans_cancel(tp, cancelflags);
        /*
@@ -2278,7 +2287,7 @@ xfs_rtmount_inodes(
        ASSERT(sbp->sb_rsumino != NULLFSINO);
        error = xfs_iget(mp, NULL, sbp->sb_rsumino, 0, 0, &mp->m_rsumip, 0);
        if (error) {
-                VN_RELE(XFS_ITOV(mp->m_rbmip));
+                IRELE(mp->m_rbmip);
                return error;
        }
        ASSERT(mp->m_rsumip != NULL);
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index cd3ece6cc918..b0f31c09a76d 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -126,11 +126,11 @@ xfs_write_sync_logforce(
                 * when we return.
                 */
                if (iip && iip->ili_last_lsn) {
-                        xfs_log_force(mp, iip->ili_last_lsn,
+                        error = _xfs_log_force(mp, iip->ili_last_lsn,
-                                        XFS_LOG_FORCE | XFS_LOG_SYNC);
+                                        XFS_LOG_FORCE | XFS_LOG_SYNC, NULL);
                } else if (xfs_ipincount(ip) > 0) {
-                        xfs_log_force(mp, (xfs_lsn_t)0,
+                        error = _xfs_log_force(mp, (xfs_lsn_t)0,
-                                        XFS_LOG_FORCE | XFS_LOG_SYNC);
+                                        XFS_LOG_FORCE | XFS_LOG_SYNC, NULL);
                }
        } else {
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 7f40628d85c7..0804207c7391 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -113,13 +113,8 @@ struct xfs_mount;
 struct xfs_trans;
 struct xfs_dquot_acct;
-typedef struct xfs_ail_entry {
-        struct xfs_log_item     *ail_forw;      /* AIL forw pointer */
-        struct xfs_log_item     *ail_back;      /* AIL back pointer */
-} xfs_ail_entry_t;
 typedef struct xfs_log_item {
-        xfs_ail_entry_t                 li_ail;         /* AIL pointers */
+        struct list_head                li_ail;         /* AIL pointers */
        xfs_lsn_t                       li_lsn;         /* last on-disk lsn */
        struct xfs_log_item_desc        *li_desc;       /* ptr to current desc*/
        struct xfs_mount                *li_mountp;     /* ptr to fs mount */
@@ -341,7 +336,6 @@ typedef struct xfs_trans {
        unsigned int            t_rtx_res;      /* # of rt extents resvd */
        unsigned int            t_rtx_res_used; /* # of resvd rt extents used */
        xfs_log_ticket_t        t_ticket;       /* log mgr ticket */
-        sema_t                  t_sema;         /* sema for commit completion */
        xfs_lsn_t               t_lsn;          /* log seq num of start of
                                                 * transaction. */
        xfs_lsn_t               t_commit_lsn;   /* log seq num of end of
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 76d470d8a1e6..1f77c00af566 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -28,13 +28,13 @@
 #include "xfs_trans_priv.h"
 #include "xfs_error.h"
-STATIC void xfs_ail_insert(xfs_ail_entry_t *, xfs_log_item_t *);
+STATIC void xfs_ail_insert(xfs_ail_t *, xfs_log_item_t *);
-STATIC xfs_log_item_t * xfs_ail_delete(xfs_ail_entry_t *, xfs_log_item_t *);
+STATIC xfs_log_item_t * xfs_ail_delete(xfs_ail_t *, xfs_log_item_t *);
-STATIC xfs_log_item_t * xfs_ail_min(xfs_ail_entry_t *);
+STATIC xfs_log_item_t * xfs_ail_min(xfs_ail_t *);
-STATIC xfs_log_item_t * xfs_ail_next(xfs_ail_entry_t *, xfs_log_item_t *);
+STATIC xfs_log_item_t * xfs_ail_next(xfs_ail_t *, xfs_log_item_t *);
 #ifdef DEBUG
-STATIC void xfs_ail_check(xfs_ail_entry_t *, xfs_log_item_t *);
+STATIC void xfs_ail_check(xfs_ail_t *, xfs_log_item_t *);
 #else
 #define xfs_ail_check(a,l)
 #endif /* DEBUG */
@@ -57,7 +57,7 @@ xfs_trans_tail_ail(
        xfs_log_item_t  *lip;
        spin_lock(&mp->m_ail_lock);
-        lip = xfs_ail_min(&(mp->m_ail.xa_ail));
+        lip = xfs_ail_min(&mp->m_ail);
        if (lip == NULL) {
                lsn = (xfs_lsn_t)0;
        } else {
@@ -91,7 +91,7 @@ xfs_trans_push_ail(
 {
        xfs_log_item_t          *lip;
-        lip = xfs_ail_min(&mp->m_ail.xa_ail);
+        lip = xfs_ail_min(&mp->m_ail);
        if (lip && !XFS_FORCED_SHUTDOWN(mp)) {
                if (XFS_LSN_CMP(threshold_lsn, mp->m_ail.xa_target) > 0)
                        xfsaild_wakeup(mp, threshold_lsn);
@@ -111,15 +111,17 @@ xfs_trans_first_push_ail(
 {
        xfs_log_item_t  *lip;
-        lip = xfs_ail_min(&(mp->m_ail.xa_ail));
+        lip = xfs_ail_min(&mp->m_ail);
        *gen = (int)mp->m_ail.xa_gen;
        if (lsn == 0)
                return lip;
-        while (lip && (XFS_LSN_CMP(lip->li_lsn, lsn) < 0))
+        list_for_each_entry(lip, &mp->m_ail.xa_ail, li_ail) {
-                lip = lip->li_ail.ail_forw;
+                if (XFS_LSN_CMP(lip->li_lsn, lsn) >= 0)
+                        return lip;
+        }
-        return lip;
+        return NULL;
 }
 /*
@@ -329,7 +331,7 @@ xfs_trans_unlocked_item(
         * the call to xfs_log_move_tail() doesn't do anything if there's
         * not enough free space to wake people up so we're safe calling it.
         */
-        min_lip = xfs_ail_min(&mp->m_ail.xa_ail);
+        min_lip = xfs_ail_min(&mp->m_ail);
        if (min_lip == lip)
                xfs_log_move_tail(mp, 1);
@@ -357,15 +359,13 @@ xfs_trans_update_ail(
        xfs_log_item_t  *lip,
        xfs_lsn_t       lsn) __releases(mp->m_ail_lock)
 {
-        xfs_ail_entry_t         *ailp;
        xfs_log_item_t          *dlip=NULL;
        xfs_log_item_t          *mlip;  /* ptr to minimum lip */
-        ailp = &(mp->m_ail.xa_ail);
+        mlip = xfs_ail_min(&mp->m_ail);
-        mlip = xfs_ail_min(ailp);
        if (lip->li_flags & XFS_LI_IN_AIL) {
-                dlip = xfs_ail_delete(ailp, lip);
+                dlip = xfs_ail_delete(&mp->m_ail, lip);
                ASSERT(dlip == lip);
        } else {
                lip->li_flags |= XFS_LI_IN_AIL;
@@ -373,11 +373,11 @@ xfs_trans_update_ail(
        lip->li_lsn = lsn;
-        xfs_ail_insert(ailp, lip);
+        xfs_ail_insert(&mp->m_ail, lip);
        mp->m_ail.xa_gen++;
        if (mlip == dlip) {
-                mlip = xfs_ail_min(&(mp->m_ail.xa_ail));
+                mlip = xfs_ail_min(&mp->m_ail);
                spin_unlock(&mp->m_ail_lock);
                xfs_log_move_tail(mp, mlip->li_lsn);
        } else {
@@ -407,14 +407,12 @@ xfs_trans_delete_ail(
        xfs_mount_t     *mp,
        xfs_log_item_t  *lip) __releases(mp->m_ail_lock)
 {
-        xfs_ail_entry_t         *ailp;
        xfs_log_item_t          *dlip;
        xfs_log_item_t          *mlip;
        if (lip->li_flags & XFS_LI_IN_AIL) {
-                ailp = &(mp->m_ail.xa_ail);
+                mlip = xfs_ail_min(&mp->m_ail);
-                mlip = xfs_ail_min(ailp);
+                dlip = xfs_ail_delete(&mp->m_ail, lip);
-                dlip = xfs_ail_delete(ailp, lip);
                ASSERT(dlip == lip);
@@ -423,7 +421,7 @@ xfs_trans_delete_ail(
                mp->m_ail.xa_gen++;
                if (mlip == dlip) {
-                        mlip = xfs_ail_min(&(mp->m_ail.xa_ail));
+                        mlip = xfs_ail_min(&mp->m_ail);
                        spin_unlock(&mp->m_ail_lock);
                        xfs_log_move_tail(mp, (mlip ? mlip->li_lsn : 0));
                } else {
@@ -440,7 +438,7 @@ xfs_trans_delete_ail(
                else {
                        xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp,
                "%s: attempting to delete a log item that is not in the AIL",
-                                        __FUNCTION__);
+                                        __func__);
                        spin_unlock(&mp->m_ail_lock);
                        xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
                }
@@ -461,7 +459,7 @@ xfs_trans_first_ail(
 {
        xfs_log_item_t  *lip;
-        lip = xfs_ail_min(&(mp->m_ail.xa_ail));
+        lip = xfs_ail_min(&mp->m_ail);
        *gen = (int)mp->m_ail.xa_gen;
        return lip;
@@ -485,9 +483,9 @@ xfs_trans_next_ail(
        ASSERT(mp && lip && gen);
        if (mp->m_ail.xa_gen == *gen) {
-                nlip = xfs_ail_next(&(mp->m_ail.xa_ail), lip);
+                nlip = xfs_ail_next(&mp->m_ail, lip);
        } else {
-                nlip = xfs_ail_min(&(mp->m_ail).xa_ail);
+                nlip = xfs_ail_min(&mp->m_ail);
                *gen = (int)mp->m_ail.xa_gen;
                if (restarts != NULL) {
                        XFS_STATS_INC(xs_push_ail_restarts);
@@ -517,8 +515,7 @@ int
 xfs_trans_ail_init(
        xfs_mount_t     *mp)
 {
-        mp->m_ail.xa_ail.ail_forw = (xfs_log_item_t*)&mp->m_ail.xa_ail;
+        INIT_LIST_HEAD(&mp->m_ail.xa_ail);
-        mp->m_ail.xa_ail.ail_back = (xfs_log_item_t*)&mp->m_ail.xa_ail;
        return xfsaild_start(mp);
 }
@@ -537,7 +534,7 @@ xfs_trans_ail_destroy(
 */
 STATIC void
 xfs_ail_insert(
-        xfs_ail_entry_t *base,
+        xfs_ail_t       *ailp,
        xfs_log_item_t  *lip)
 /* ARGSUSED */
 {
@@ -546,27 +543,22 @@ xfs_ail_insert(
        /*
         * If the list is empty, just insert the item.
         */
-        if (base->ail_back == (xfs_log_item_t*)base) {
+        if (list_empty(&ailp->xa_ail)) {
-                base->ail_forw = lip;
+                list_add(&lip->li_ail, &ailp->xa_ail);
-                base->ail_back = lip;
-                lip->li_ail.ail_forw = (xfs_log_item_t*)base;
-                lip->li_ail.ail_back = (xfs_log_item_t*)base;
                return;
        }
-        next_lip = base->ail_back;
+        list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) {
-        while ((next_lip != (xfs_log_item_t*)base) &&
+                if (XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) <= 0)
-               (XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) > 0)) {
+                        break;
-                next_lip = next_lip->li_ail.ail_back;
        }
-        ASSERT((next_lip == (xfs_log_item_t*)base) ||
+        ASSERT((&next_lip->li_ail == &ailp->xa_ail) ||
               (XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) <= 0));
-        lip->li_ail.ail_forw = next_lip->li_ail.ail_forw;
-        lip->li_ail.ail_back = next_lip;
-        next_lip->li_ail.ail_forw = lip;
-        lip->li_ail.ail_forw->li_ail.ail_back = lip;
-        xfs_ail_check(base, lip);
+        list_add(&lip->li_ail, &next_lip->li_ail);
+        xfs_ail_check(ailp, lip);
        return;
 }
@@ -576,15 +568,13 @@ xfs_ail_insert(
 /*ARGSUSED*/
 STATIC xfs_log_item_t *
 xfs_ail_delete(
-        xfs_ail_entry_t *base,
+        xfs_ail_t       *ailp,
        xfs_log_item_t  *lip)
 /* ARGSUSED */
 {
-        xfs_ail_check(base, lip);
+        xfs_ail_check(ailp, lip);
-        lip->li_ail.ail_forw->li_ail.ail_back = lip->li_ail.ail_back;
-        lip->li_ail.ail_back->li_ail.ail_forw = lip->li_ail.ail_forw;
+        list_del(&lip->li_ail);
-        lip->li_ail.ail_forw = NULL;
-        lip->li_ail.ail_back = NULL;
        return lip;
 }
@@ -595,14 +585,13 @@ xfs_ail_delete(
 */
 STATIC xfs_log_item_t *
 xfs_ail_min(
-        xfs_ail_entry_t *base)
+        xfs_ail_t       *ailp)
 /* ARGSUSED */
 {
-        register xfs_log_item_t *forw = base->ail_forw;
+        if (list_empty(&ailp->xa_ail))
-        if (forw == (xfs_log_item_t*)base) {
                return NULL;
-        }
-        return forw;
+        return list_first_entry(&ailp->xa_ail, xfs_log_item_t, li_ail);
 }
 /*
@@ -612,15 +601,14 @@ xfs_ail_min(
 */
 STATIC xfs_log_item_t *
 xfs_ail_next(
-        xfs_ail_entry_t *base,
+        xfs_ail_t       *ailp,
        xfs_log_item_t  *lip)
 /* ARGSUSED */
 {
-        if (lip->li_ail.ail_forw == (xfs_log_item_t*)base) {
+        if (lip->li_ail.next == &ailp->xa_ail)
                return NULL;
-        }
-        return lip->li_ail.ail_forw;
+        return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail);
 }
 #ifdef DEBUG
@@ -629,57 +617,40 @@ xfs_ail_next(
 */
 STATIC void
 xfs_ail_check(
-        xfs_ail_entry_t *base,
+        xfs_ail_t       *ailp,
        xfs_log_item_t  *lip)
 {
        xfs_log_item_t  *prev_lip;
-        prev_lip = base->ail_forw;
+        if (list_empty(&ailp->xa_ail))
-        if (prev_lip == (xfs_log_item_t*)base) {
-                /*
-                 * Make sure the pointers are correct when the list
-                 * is empty.
-                 */
-                ASSERT(base->ail_back == (xfs_log_item_t*)base);
                return;
-        }
        /*
         * Check the next and previous entries are valid.
         */
        ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0);
-        prev_lip = lip->li_ail.ail_back;
+        prev_lip = list_entry(lip->li_ail.prev, xfs_log_item_t, li_ail);
-        if (prev_lip != (xfs_log_item_t*)base) {
+        if (&prev_lip->li_ail != &ailp->xa_ail)
-                ASSERT(prev_lip->li_ail.ail_forw == lip);
                ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0);
-        }
-        prev_lip = lip->li_ail.ail_forw;
+        prev_lip = list_entry(lip->li_ail.next, xfs_log_item_t, li_ail);
-        if (prev_lip != (xfs_log_item_t*)base) {
+        if (&prev_lip->li_ail != &ailp->xa_ail)
-                ASSERT(prev_lip->li_ail.ail_back == lip);
                ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0);
-        }
 #ifdef XFS_TRANS_DEBUG
        /*
-         * Walk the list checking forward and backward pointers,
+         * Walk the list checking lsn ordering, and that every entry has the
-         * lsn ordering, and that every entry has the XFS_LI_IN_AIL
+         * XFS_LI_IN_AIL flag set. This is really expensive, so only do it
-         * flag set. This is really expensive, so only do it when
+         * when specifically debugging the transaction subsystem.
-         * specifically debugging the transaction subsystem.
         */
-        prev_lip = (xfs_log_item_t*)base;
+        prev_lip = list_entry(&ailp->xa_ail, xfs_log_item_t, li_ail);
-        while (lip != (xfs_log_item_t*)base) {
+        list_for_each_entry(lip, &ailp->xa_ail, li_ail) {
-                if (prev_lip != (xfs_log_item_t*)base) {
+                if (&prev_lip->li_ail != &ailp->xa_ail)
-                        ASSERT(prev_lip->li_ail.ail_forw == lip);
                        ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0);
-                }
-                ASSERT(lip->li_ail.ail_back == prev_lip);
                ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0);
                prev_lip = lip;
-                lip = lip->li_ail.ail_forw;
        }
-        ASSERT(lip == (xfs_log_item_t*)base);
-        ASSERT(base->ail_back == prev_lip);
 #endif /* XFS_TRANS_DEBUG */
 }
 #endif /* DEBUG */
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 60b6b898022b..cb0c5839154b 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -304,7 +304,8 @@ xfs_trans_read_buf(
        if (tp == NULL) {
                bp = xfs_buf_read_flags(target, blkno, len, flags | BUF_BUSY);
                if (!bp)
-                        return XFS_ERROR(ENOMEM);
+                        return (flags & XFS_BUF_TRYLOCK) ?
+                                        EAGAIN : XFS_ERROR(ENOMEM);
                if ((bp != NULL) && (XFS_BUF_GETERROR(bp) != 0)) {
                        xfs_ioerror_alert("xfs_trans_read_buf", mp,
@@ -353,17 +354,15 @@ xfs_trans_read_buf(
                        ASSERT(!XFS_BUF_ISASYNC(bp));
                        XFS_BUF_READ(bp);
                        xfsbdstrat(tp->t_mountp, bp);
-                        xfs_iowait(bp);
+                        error = xfs_iowait(bp);
-                        if (XFS_BUF_GETERROR(bp) != 0) {
+                        if (error) {
                                xfs_ioerror_alert("xfs_trans_read_buf", mp,
                                                  bp, blkno);
-                                error = XFS_BUF_GETERROR(bp);
                                xfs_buf_relse(bp);
                                /*
-                                 * We can gracefully recover from most
+                                 * We can gracefully recover from most read
-                                 * read errors. Ones we can't are those
+                                 * errors. Ones we can't are those that happen
-                                 * that happen after the transaction's
+                                 * after the transaction's already dirty.
-                                 * already dirty.
                                 */
                                if (tp->t_flags & XFS_TRANS_DIRTY)
                                        xfs_force_shutdown(tp->t_mountp,
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h
index 5c89be475464..0f5191644ab2 100644
--- a/fs/xfs/xfs_types.h
+++ b/fs/xfs/xfs_types.h
@@ -160,4 +160,9 @@ typedef enum {
        XFS_BTNUM_MAX
 } xfs_btnum_t;
+struct xfs_name {
+        const char      *name;
+        int             len;
+};
 #endif  /* __XFS_TYPES_H__ */
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index 18a85e746680..2b8dc7e40772 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -40,34 +40,12 @@
 #include "xfs_itable.h"
 #include "xfs_utils.h"
-/*
- * xfs_get_dir_entry is used to get a reference to an inode given
- * its parent directory inode and the name of the file.  It does
- * not lock the child inode, and it unlocks the directory before
- * returning.  The directory's generation number is returned for
- * use by a later call to xfs_lock_dir_and_entry.
- */
-int
-xfs_get_dir_entry(
-        bhv_vname_t     *dentry,
-        xfs_inode_t     **ipp)
-{
-        bhv_vnode_t     *vp;
-        vp = VNAME_TO_VNODE(dentry);
-        *ipp = xfs_vtoi(vp);
-        if (!*ipp)
-                return XFS_ERROR(ENOENT);
-        VN_HOLD(vp);
-        return 0;
-}
 int
 xfs_dir_lookup_int(
        xfs_inode_t     *dp,
        uint            lock_mode,
-        bhv_vname_t     *dentry,
+        struct xfs_name *name,
        xfs_ino_t       *inum,
        xfs_inode_t     **ipp)
 {
@@ -75,7 +53,7 @@ xfs_dir_lookup_int(
        xfs_itrace_entry(dp);
-        error = xfs_dir_lookup(NULL, dp, VNAME(dentry), VNAMELEN(dentry), inum);
+        error = xfs_dir_lookup(NULL, dp, name, inum);
        if (!error) {
                /*
                 * Unlock the directory. We do this because we can't
diff --git a/fs/xfs/xfs_utils.h b/fs/xfs/xfs_utils.h
index f857fcccb723..175b126d2cab 100644
--- a/fs/xfs/xfs_utils.h
+++ b/fs/xfs/xfs_utils.h
@@ -21,15 +21,14 @@
 #define IRELE(ip)       VN_RELE(XFS_ITOV(ip))
 #define IHOLD(ip)       VN_HOLD(XFS_ITOV(ip))
-extern int xfs_get_dir_entry (bhv_vname_t *, xfs_inode_t **);
+extern int xfs_dir_lookup_int(xfs_inode_t *, uint, struct xfs_name *,
-extern int xfs_dir_lookup_int (xfs_inode_t *, uint, bhv_vname_t *, xfs_ino_t *,
+                                xfs_ino_t *, xfs_inode_t **);
-                                xfs_inode_t **);
+extern int xfs_truncate_file(xfs_mount_t *, xfs_inode_t *);
-extern int xfs_truncate_file (xfs_mount_t *, xfs_inode_t *);
+extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t,
-extern int xfs_dir_ialloc (xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t,
                                xfs_dev_t, cred_t *, prid_t, int,
                                xfs_inode_t **, int *);
-extern int xfs_droplink (xfs_trans_t *, xfs_inode_t *);
+extern int xfs_droplink(xfs_trans_t *, xfs_inode_t *);
-extern int xfs_bumplink (xfs_trans_t *, xfs_inode_t *);
+extern int xfs_bumplink(xfs_trans_t *, xfs_inode_t *);
-extern void xfs_bump_ino_vers2 (xfs_trans_t *, xfs_inode_t *);
+extern void xfs_bump_ino_vers2(xfs_trans_t *, xfs_inode_t *);
 #endif  /* __XFS_UTILS_H__ */
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 7094caff13cf..fc48158fe479 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -43,7 +43,6 @@
 #include "xfs_error.h"
 #include "xfs_bmap.h"
 #include "xfs_rw.h"
-#include "xfs_refcache.h"
 #include "xfs_buf_item.h"
 #include "xfs_log_priv.h"
 #include "xfs_dir2_trace.h"
@@ -56,6 +55,7 @@
 #include "xfs_fsops.h"
 #include "xfs_vnodeops.h"
 #include "xfs_vfsops.h"
+#include "xfs_utils.h"
 int __init
@@ -69,15 +69,17 @@ xfs_init(void)
        /*
         * Initialize all of the zone allocators we use.
         */
+        xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
+                                                "xfs_log_ticket");
        xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t),
-                                                 "xfs_bmap_free_item");
+                                                "xfs_bmap_free_item");
        xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
-                                            "xfs_btree_cur");
+                                                "xfs_btree_cur");
-        xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans");
+        xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t),
-        xfs_da_state_zone =
+                                                "xfs_da_state");
-                kmem_zone_init(sizeof(xfs_da_state_t), "xfs_da_state");
        xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf");
        xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
+        xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans");
        xfs_acl_zone_init(xfs_acl_zone, "xfs_acl");
        xfs_mru_cache_init();
        xfs_filestream_init();
@@ -113,9 +115,6 @@ xfs_init(void)
        xfs_ili_zone =
                kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
                                        KM_ZONE_SPREAD, NULL);
-        xfs_icluster_zone =
-                kmem_zone_init_flags(sizeof(xfs_icluster_t), "xfs_icluster",
-                                        KM_ZONE_SPREAD, NULL);
        /*
         * Allocate global trace buffers.
@@ -153,11 +152,9 @@ xfs_cleanup(void)
        extern kmem_zone_t      *xfs_inode_zone;
        extern kmem_zone_t      *xfs_efd_zone;
        extern kmem_zone_t      *xfs_efi_zone;
-        extern kmem_zone_t      *xfs_icluster_zone;
        xfs_cleanup_procfs();
        xfs_sysctl_unregister();
-        xfs_refcache_destroy();
        xfs_filestream_uninit();
        xfs_mru_cache_uninit();
        xfs_acl_zone_destroy(xfs_acl_zone);
@@ -189,7 +186,6 @@ xfs_cleanup(void)
        kmem_zone_destroy(xfs_efi_zone);
        kmem_zone_destroy(xfs_ifork_zone);
        kmem_zone_destroy(xfs_ili_zone);
-        kmem_zone_destroy(xfs_icluster_zone);
 }
 /*
@@ -573,7 +569,7 @@ xfs_unmount(
 #ifdef HAVE_DMAPI
        if (mp->m_flags & XFS_MOUNT_DMAPI) {
                error = XFS_SEND_PREUNMOUNT(mp,
-                                rvp, DM_RIGHT_NULL, rvp, DM_RIGHT_NULL,
+                                rip, DM_RIGHT_NULL, rip, DM_RIGHT_NULL,
                                NULL, NULL, 0, 0,
                                (mp->m_dmevmask & (1<<DM_EVENT_PREUNMOUNT))?
                                        0:DM_FLAGS_UNWANTED);
@@ -584,11 +580,6 @@ xfs_unmount(
                                        0 : DM_FLAGS_UNWANTED;
        }
 #endif
-        /*
-         * First blow any referenced inode from this file system
-         * out of the reference cache, and delete the timer.
-         */
-        xfs_refcache_purge_mp(mp);
        /*
         * Blow away any referenced inode in the filestreams cache.
@@ -607,7 +598,7 @@ xfs_unmount(
        /*
         * Drop the reference count
         */
-        VN_RELE(rvp);
+        IRELE(rip);
        /*
         * If we're forcing a shutdown, typically because of a media error,
@@ -629,7 +620,7 @@ out:
                /* Note: mp structure must still exist for
                 * XFS_SEND_UNMOUNT() call.
                 */
-                XFS_SEND_UNMOUNT(mp, error == 0 ? rvp : NULL,
+                XFS_SEND_UNMOUNT(mp, error == 0 ? rip : NULL,
                        DM_RIGHT_NULL, 0, error, unmount_event_flags);
        }
        if (xfs_unmountfs_needed) {
@@ -646,13 +637,12 @@ out:
        return XFS_ERROR(error);
 }
-STATIC int
+STATIC void
 xfs_quiesce_fs(
        xfs_mount_t             *mp)
 {
        int                     count = 0, pincount;
-        xfs_refcache_purge_mp(mp);
        xfs_flush_buftarg(mp->m_ddev_targp, 0);
        xfs_finish_reclaim_all(mp, 0);
@@ -671,8 +661,6 @@ xfs_quiesce_fs(
                        count++;
                }
        } while (count < 2);
-        return 0;
 }
 /*
@@ -684,6 +672,8 @@ void
 xfs_attr_quiesce(
        xfs_mount_t     *mp)
 {
+        int     error = 0;
        /* wait for all modifications to complete */
        while (atomic_read(&mp->m_active_trans) > 0)
                delay(100);
@@ -694,7 +684,11 @@ xfs_attr_quiesce(
        ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0);
        /* Push the superblock and write an unmount record */
-        xfs_log_sbcount(mp, 1);
+        error = xfs_log_sbcount(mp, 1);
+        if (error)
+                xfs_fs_cmn_err(CE_WARN, mp,
+                                "xfs_attr_quiesce: failed to log sb changes. "
+                                "Frozen image may not be consistent.");
        xfs_log_unmount_write(mp);
        xfs_unmountfs_writesb(mp);
 }
@@ -790,8 +784,8 @@ xfs_unmount_flush(
                goto fscorrupt_out2;
        if (rbmip) {
-                VN_RELE(XFS_ITOV(rbmip));
+                IRELE(rbmip);
-                VN_RELE(XFS_ITOV(rsumip));
+                IRELE(rsumip);
        }
        xfs_iunlock(rip, XFS_ILOCK_EXCL);
@@ -1169,10 +1163,10 @@ xfs_sync_inodes(
                         * above, then wait until after we've unlocked
                         * the inode to release the reference.  This is
                         * because we can be already holding the inode
-                         * lock when VN_RELE() calls xfs_inactive().
+                         * lock when IRELE() calls xfs_inactive().
                         *
                         * Make sure to drop the mount lock before calling
-                         * VN_RELE() so that we don't trip over ourselves if
+                         * IRELE() so that we don't trip over ourselves if
                         * we have to go for the mount lock again in the
                         * inactive code.
                         */
@@ -1180,7 +1174,7 @@ xfs_sync_inodes(
                                IPOINTER_INSERT(ip, mp);
                        }
-                        VN_RELE(vp);
+                        IRELE(ip);
                        vnode_refed = B_FALSE;
                }
@@ -1323,30 +1317,8 @@ xfs_syncsub(
        }
        /*
-         * If this is the periodic sync, then kick some entries out of
-         * the reference cache.  This ensures that idle entries are
-         * eventually kicked out of the cache.
-         */
-        if (flags & SYNC_REFCACHE) {
-                if (flags & SYNC_WAIT)
-                        xfs_refcache_purge_mp(mp);
-                else
-                        xfs_refcache_purge_some(mp);
-        }
-        /*
-         * If asked, update the disk superblock with incore counter values if we
-         * are using non-persistent counters so that they don't get too far out
-         * of sync if we crash or get a forced shutdown. We don't want to force
-         * this to disk, just get a transaction into the iclogs....
-         */
-        if (flags & SYNC_SUPER)
-                xfs_log_sbcount(mp, 0);
-        /*
         * Now check to see if the log needs a "dummy" transaction.
         */
        if (!(flags & SYNC_REMOUNT) && xfs_log_need_covered(mp)) {
                xfs_trans_t *tp;
                xfs_inode_t *ip;
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 64c5953feca4..6650601c64f7 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -48,7 +48,6 @@
 #include "xfs_quota.h"
 #include "xfs_utils.h"
 #include "xfs_rtalloc.h"
-#include "xfs_refcache.h"
 #include "xfs_trans_space.h"
 #include "xfs_log_priv.h"
 #include "xfs_filestream.h"
@@ -327,7 +326,7 @@ xfs_setattr(
                if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) &&
                    !(flags & ATTR_DMI)) {
                        int dmflags = AT_DELAY_FLAG(flags) | DM_SEM_FLAG_WR;
-                        code = XFS_SEND_DATA(mp, DM_EVENT_TRUNCATE, vp,
+                        code = XFS_SEND_DATA(mp, DM_EVENT_TRUNCATE, ip,
                                vap->va_size, 0, dmflags, NULL);
                        if (code) {
                                lock_flags = 0;
@@ -634,6 +633,15 @@ xfs_setattr(
         * Truncate file.  Must have write permission and not be a directory.
         */
        if (mask & XFS_AT_SIZE) {
+                /*
+                 * Only change the c/mtime if we are changing the size
+                 * or we are explicitly asked to change it. This handles
+                 * the semantic difference between truncate() and ftruncate()
+                 * as implemented in the VFS.
+                 */
+                if (vap->va_size != ip->i_size || (mask & XFS_AT_CTIME))
+                        timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
                if (vap->va_size > ip->i_size) {
                        xfs_igrow_finish(tp, ip, vap->va_size,
                            !(flags & ATTR_DMI));
@@ -662,10 +670,6 @@ xfs_setattr(
                         */
                        xfs_iflags_set(ip, XFS_ITRUNCATED);
                }
-                /*
-                 * Have to do this even if the file's size doesn't change.
-                 */
-                timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
        }
        /*
@@ -877,7 +881,7 @@ xfs_setattr(
        if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE) &&
            !(flags & ATTR_DMI)) {
-                (void) XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, vp, DM_RIGHT_NULL,
+                (void) XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, ip, DM_RIGHT_NULL,
                                        NULL, DM_RIGHT_NULL, NULL, NULL,
                                        0, 0, AT_DELAY_FLAG(flags));
        }
@@ -1443,28 +1447,22 @@ xfs_inactive_attrs(
        tp = *tpp;
        mp = ip->i_mount;
        ASSERT(ip->i_d.di_forkoff != 0);
-        xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+        error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
        xfs_iunlock(ip, XFS_ILOCK_EXCL);
+        if (error)
+                goto error_unlock;
        error = xfs_attr_inactive(ip);
-        if (error) {
+        if (error)
-                *tpp = NULL;
+                goto error_unlock;
-                xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-                return error; /* goto out */
-        }
        tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
        error = xfs_trans_reserve(tp, 0,
                                  XFS_IFREE_LOG_RES(mp),
                                  0, XFS_TRANS_PERM_LOG_RES,
                                  XFS_INACTIVE_LOG_COUNT);
-        if (error) {
+        if (error)
-                ASSERT(XFS_FORCED_SHUTDOWN(mp));
+                goto error_cancel;
-                xfs_trans_cancel(tp, 0);
-                *tpp = NULL;
-                xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-                return error;
-        }
        xfs_ilock(ip, XFS_ILOCK_EXCL);
        xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
@@ -1475,6 +1473,14 @@ xfs_inactive_attrs(
        *tpp = tp;
        return 0;
+error_cancel:
+        ASSERT(XFS_FORCED_SHUTDOWN(mp));
+        xfs_trans_cancel(tp, 0);
+error_unlock:
+        *tpp = NULL;
+        xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+        return error;
 }
 int
@@ -1520,12 +1526,6 @@ xfs_release(
                        xfs_flush_pages(ip, 0, -1, XFS_B_ASYNC, FI_NONE);
        }
-#ifdef HAVE_REFCACHE
-        /* If we are in the NFS reference cache then don't do this now */
-        if (ip->i_refcache)
-                return 0;
-#endif
        if (ip->i_d.di_nlink != 0) {
                if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
                     ((ip->i_size > 0) || (VN_CACHED(vp) > 0 ||
@@ -1588,9 +1588,8 @@ xfs_inactive(
        mp = ip->i_mount;
-        if (ip->i_d.di_nlink == 0 && DM_EVENT_ENABLED(ip, DM_EVENT_DESTROY)) {
+        if (ip->i_d.di_nlink == 0 && DM_EVENT_ENABLED(ip, DM_EVENT_DESTROY))
-                (void) XFS_SEND_DESTROY(mp, vp, DM_RIGHT_NULL);
+                XFS_SEND_DESTROY(mp, ip, DM_RIGHT_NULL);
-        }
        error = 0;
@@ -1744,11 +1743,18 @@ xfs_inactive(
                XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_ICOUNT, -1);
                /*
-                 * Just ignore errors at this point.  There is
+                 * Just ignore errors at this point.  There is nothing we can
-                 * nothing we can do except to try to keep going.
+                 * do except to try to keep going. Make sure it's not a silent
+                 * error.
                 */
-                (void) xfs_bmap_finish(&tp,  &free_list, &committed);
+                error = xfs_bmap_finish(&tp,  &free_list, &committed);
-                (void) xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+                if (error)
+                        xfs_fs_cmn_err(CE_NOTE, mp, "xfs_inactive: "
+                                "xfs_bmap_finish() returned error %d", error);
+                error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+                if (error)
+                        xfs_fs_cmn_err(CE_NOTE, mp, "xfs_inactive: "
+                                "xfs_trans_commit() returned error %d", error);
        }
        /*
         * Release the dquots held by inode, if any.
@@ -1765,8 +1771,8 @@ xfs_inactive(
 int
 xfs_lookup(
        xfs_inode_t             *dp,
-        bhv_vname_t             *dentry,
+        struct xfs_name         *name,
-        bhv_vnode_t             **vpp)
+        xfs_inode_t             **ipp)
 {
        xfs_inode_t             *ip;
        xfs_ino_t               e_inum;
@@ -1779,9 +1785,9 @@ xfs_lookup(
                return XFS_ERROR(EIO);
        lock_mode = xfs_ilock_map_shared(dp);
-        error = xfs_dir_lookup_int(dp, lock_mode, dentry, &e_inum, &ip);
+        error = xfs_dir_lookup_int(dp, lock_mode, name, &e_inum, &ip);
        if (!error) {
-                *vpp = XFS_ITOV(ip);
+                *ipp = ip;
                xfs_itrace_ref(ip);
        }
        xfs_iunlock_map_shared(dp, lock_mode);
@@ -1791,19 +1797,16 @@ xfs_lookup(
 int
 xfs_create(
        xfs_inode_t             *dp,
-        bhv_vname_t             *dentry,
+        struct xfs_name         *name,
        mode_t                  mode,
        xfs_dev_t               rdev,
-        bhv_vnode_t             **vpp,
+        xfs_inode_t             **ipp,
        cred_t                  *credp)
 {
-        char                    *name = VNAME(dentry);
+        xfs_mount_t             *mp = dp->i_mount;
-        xfs_mount_t             *mp = dp->i_mount;
-        bhv_vnode_t             *dir_vp = XFS_ITOV(dp);
        xfs_inode_t             *ip;
-        bhv_vnode_t             *vp = NULL;
        xfs_trans_t             *tp;
-        int                     error;
+        int                     error;
        xfs_bmap_free_t         free_list;
        xfs_fsblock_t           first_block;
        boolean_t               unlock_dp_on_error = B_FALSE;
@@ -1813,17 +1816,14 @@ xfs_create(
        xfs_prid_t              prid;
        struct xfs_dquot        *udqp, *gdqp;
        uint                    resblks;
-        int                     namelen;
-        ASSERT(!*vpp);
+        ASSERT(!*ipp);
        xfs_itrace_entry(dp);
-        namelen = VNAMELEN(dentry);
        if (DM_EVENT_ENABLED(dp, DM_EVENT_CREATE)) {
                error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE,
-                                dir_vp, DM_RIGHT_NULL, NULL,
+                                dp, DM_RIGHT_NULL, NULL,
-                                DM_RIGHT_NULL, name, NULL,
+                                DM_RIGHT_NULL, name->name, NULL,
                                mode, 0, 0);
                if (error)
@@ -1855,7 +1855,7 @@ xfs_create(
        tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE);
        cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
-        resblks = XFS_CREATE_SPACE_RES(mp, namelen);
+        resblks = XFS_CREATE_SPACE_RES(mp, name->len);
        /*
         * Initially assume that the file does not exist and
         * reserve the resources for that case.  If that is not
@@ -1888,7 +1888,8 @@ xfs_create(
        if (error)
                goto error_return;
-        if (resblks == 0 && (error = xfs_dir_canenter(tp, dp, name, namelen)))
+        error = xfs_dir_canenter(tp, dp, name, resblks);
+        if (error)
                goto error_return;
        error = xfs_dir_ialloc(&tp, dp, mode, 1,
                        rdev, credp, prid, resblks > 0,
@@ -1914,11 +1915,11 @@ xfs_create(
         * the transaction cancel unlocking dp so don't do it explicitly in the
         * error path.
         */
-        VN_HOLD(dir_vp);
+        IHOLD(dp);
        xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
        unlock_dp_on_error = B_FALSE;
-        error = xfs_dir_createname(tp, dp, name, namelen, ip->i_ino,
+        error = xfs_dir_createname(tp, dp, name, ip->i_ino,
                                        &first_block, &free_list, resblks ?
                                        resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
        if (error) {
@@ -1952,7 +1953,6 @@ xfs_create(
         * vnode to the caller, we bump the vnode ref count now.
         */
        IHOLD(ip);
-        vp = XFS_ITOV(ip);
        error = xfs_bmap_finish(&tp, &free_list, &committed);
        if (error) {
@@ -1970,17 +1970,17 @@ xfs_create(
        XFS_QM_DQRELE(mp, udqp);
        XFS_QM_DQRELE(mp, gdqp);
-        *vpp = vp;
+        *ipp = ip;
        /* Fallthrough to std_return with error = 0  */
 std_return:
-        if ((*vpp || (error != 0 && dm_event_sent != 0)) &&
+        if ((*ipp || (error != 0 && dm_event_sent != 0)) &&
            DM_EVENT_ENABLED(dp, DM_EVENT_POSTCREATE)) {
                (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE,
-                        dir_vp, DM_RIGHT_NULL,
+                        dp, DM_RIGHT_NULL,
-                        *vpp ? vp:NULL,
+                        *ipp ? ip : NULL,
-                        DM_RIGHT_NULL, name, NULL,
+                        DM_RIGHT_NULL, name->name, NULL,
                        mode, error, 0);
        }
        return error;
@@ -2272,46 +2272,32 @@ int remove_which_error_return = 0;
 int
 xfs_remove(
        xfs_inode_t             *dp,
-        bhv_vname_t             *dentry)
+        struct xfs_name         *name,
+        xfs_inode_t             *ip)
 {
-        bhv_vnode_t             *dir_vp = XFS_ITOV(dp);
-        char                    *name = VNAME(dentry);
        xfs_mount_t             *mp = dp->i_mount;
-        xfs_inode_t             *ip;
        xfs_trans_t             *tp = NULL;
        int                     error = 0;
        xfs_bmap_free_t         free_list;
        xfs_fsblock_t           first_block;
        int                     cancel_flags;
        int                     committed;
-        int                     dm_di_mode = 0;
        int                     link_zero;
        uint                    resblks;
-        int                     namelen;
        xfs_itrace_entry(dp);
        if (XFS_FORCED_SHUTDOWN(mp))
                return XFS_ERROR(EIO);
-        namelen = VNAMELEN(dentry);
-        if (!xfs_get_dir_entry(dentry, &ip)) {
-                dm_di_mode = ip->i_d.di_mode;
-                IRELE(ip);
-        }
        if (DM_EVENT_ENABLED(dp, DM_EVENT_REMOVE)) {
-                error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, dir_vp,
+                error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, dp, DM_RIGHT_NULL,
-                                        DM_RIGHT_NULL, NULL, DM_RIGHT_NULL,
+                                        NULL, DM_RIGHT_NULL, name->name, NULL,
-                                        name, NULL, dm_di_mode, 0, 0);
+                                        ip->i_d.di_mode, 0, 0);
                if (error)
                        return error;
        }
-        /* From this point on, return through std_return */
-        ip = NULL;
        /*
         * We need to get a reference to ip before we get our log
         * reservation. The reason for this is that we cannot call
@@ -2324,13 +2310,7 @@ xfs_remove(
         * when we call xfs_iget.  Instead we get an unlocked reference
         * to the inode before getting our log reservation.
         */
-        error = xfs_get_dir_entry(dentry, &ip);
+        IHOLD(ip);
-        if (error) {
-                REMOVE_DEBUG_TRACE(__LINE__);
-                goto std_return;
-        }
-        dm_di_mode = ip->i_d.di_mode;
        xfs_itrace_entry(ip);
        xfs_itrace_ref(ip);
@@ -2398,7 +2378,7 @@ xfs_remove(
         * Entry must exist since we did a lookup in xfs_lock_dir_and_entry.
         */
        XFS_BMAP_INIT(&free_list, &first_block);
-        error = xfs_dir_removename(tp, dp, name, namelen, ip->i_ino,
+        error = xfs_dir_removename(tp, dp, name, ip->i_ino,
                                        &first_block, &free_list, 0);
        if (error) {
                ASSERT(error != ENOENT);
@@ -2449,14 +2429,6 @@ xfs_remove(
        }
        /*
-         * Before we drop our extra reference to the inode, purge it
-         * from the refcache if it is there.  By waiting until afterwards
-         * to do the IRELE, we ensure that we won't go inactive in the
-         * xfs_refcache_purge_ip routine (although that would be OK).
-         */
-        xfs_refcache_purge_ip(ip);
-        /*
         * If we are using filestreams, kill the stream association.
         * If the file is still open it may get a new one but that
         * will get killed on last close in xfs_close() so we don't
@@ -2472,9 +2444,9 @@ xfs_remove(
 std_return:
        if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) {
                (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE,
-                                dir_vp, DM_RIGHT_NULL,
+                                dp, DM_RIGHT_NULL,
                                NULL, DM_RIGHT_NULL,
-                                name, NULL, dm_di_mode, error, 0);
+                                name->name, NULL, ip->i_d.di_mode, error, 0);
        }
        return error;
@@ -2495,14 +2467,6 @@ xfs_remove(
        cancel_flags |= XFS_TRANS_ABORT;
        xfs_trans_cancel(tp, cancel_flags);
-        /*
-         * Before we drop our extra reference to the inode, purge it
-         * from the refcache if it is there.  By waiting until afterwards
-         * to do the IRELE, we ensure that we won't go inactive in the
-         * xfs_refcache_purge_ip routine (although that would be OK).
-         */
-        xfs_refcache_purge_ip(ip);
        IRELE(ip);
        goto std_return;
@@ -2511,12 +2475,10 @@ xfs_remove(
 int
 xfs_link(
        xfs_inode_t             *tdp,
-        bhv_vnode_t             *src_vp,
+        xfs_inode_t             *sip,
-        bhv_vname_t             *dentry)
+        struct xfs_name         *target_name)
 {
-        bhv_vnode_t             *target_dir_vp = XFS_ITOV(tdp);
        xfs_mount_t             *mp = tdp->i_mount;
-        xfs_inode_t             *sip = xfs_vtoi(src_vp);
        xfs_trans_t             *tp;
        xfs_inode_t             *ips[2];
        int                     error;
@@ -2525,23 +2487,20 @@ xfs_link(
        int                     cancel_flags;
        int                     committed;
        int                     resblks;
-        char                    *target_name = VNAME(dentry);
-        int                     target_namelen;
        xfs_itrace_entry(tdp);
-        xfs_itrace_entry(xfs_vtoi(src_vp));
+        xfs_itrace_entry(sip);
-        target_namelen = VNAMELEN(dentry);
+        ASSERT(!S_ISDIR(sip->i_d.di_mode));
-        ASSERT(!VN_ISDIR(src_vp));
        if (XFS_FORCED_SHUTDOWN(mp))
                return XFS_ERROR(EIO);
        if (DM_EVENT_ENABLED(tdp, DM_EVENT_LINK)) {
                error = XFS_SEND_NAMESP(mp, DM_EVENT_LINK,
-                                        target_dir_vp, DM_RIGHT_NULL,
+                                        tdp, DM_RIGHT_NULL,
-                                        src_vp, DM_RIGHT_NULL,
+                                        sip, DM_RIGHT_NULL,
-                                        target_name, NULL, 0, 0, 0);
+                                        target_name->name, NULL, 0, 0, 0);
                if (error)
                        return error;
        }
@@ -2556,7 +2515,7 @@ xfs_link(
        tp = xfs_trans_alloc(mp, XFS_TRANS_LINK);
        cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
-        resblks = XFS_LINK_SPACE_RES(mp, target_namelen);
+        resblks = XFS_LINK_SPACE_RES(mp, target_name->len);
        error = xfs_trans_reserve(tp, resblks, XFS_LINK_LOG_RES(mp), 0,
                        XFS_TRANS_PERM_LOG_RES, XFS_LINK_LOG_COUNT);
        if (error == ENOSPC) {
@@ -2584,8 +2543,8 @@ xfs_link(
         * xfs_trans_cancel will both unlock the inodes and
         * decrement the associated ref counts.
         */
-        VN_HOLD(src_vp);
+        IHOLD(sip);
-        VN_HOLD(target_dir_vp);
+        IHOLD(tdp);
        xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL);
        xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL);
@@ -2608,15 +2567,14 @@ xfs_link(
                goto error_return;
        }
-        if (resblks == 0 &&
+        error = xfs_dir_canenter(tp, tdp, target_name, resblks);
-            (error = xfs_dir_canenter(tp, tdp, target_name, target_namelen)))
+        if (error)
                goto error_return;
        XFS_BMAP_INIT(&free_list, &first_block);
-        error = xfs_dir_createname(tp, tdp, target_name, target_namelen,
+        error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino,
-                                   sip->i_ino, &first_block, &free_list,
+                                        &first_block, &free_list, resblks);
-                                   resblks);
        if (error)
                goto abort_return;
        xfs_ichgtime(tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -2650,9 +2608,9 @@ xfs_link(
 std_return:
        if (DM_EVENT_ENABLED(sip, DM_EVENT_POSTLINK)) {
                (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTLINK,
-                                target_dir_vp, DM_RIGHT_NULL,
+                                tdp, DM_RIGHT_NULL,
-                                src_vp, DM_RIGHT_NULL,
+                                sip, DM_RIGHT_NULL,
-                                target_name, NULL, 0, error, 0);
+                                target_name->name, NULL, 0, error, 0);
        }
        return error;
@@ -2669,17 +2627,13 @@ std_return:
 int
 xfs_mkdir(
        xfs_inode_t             *dp,
-        bhv_vname_t             *dentry,
+        struct xfs_name         *dir_name,
        mode_t                  mode,
-        bhv_vnode_t             **vpp,
+        xfs_inode_t             **ipp,
        cred_t                  *credp)
 {
-        bhv_vnode_t             *dir_vp = XFS_ITOV(dp);
-        char                    *dir_name = VNAME(dentry);
-        int                     dir_namelen = VNAMELEN(dentry);
        xfs_mount_t             *mp = dp->i_mount;
        xfs_inode_t             *cdp;   /* inode of created dir */
-        bhv_vnode_t             *cvp;   /* vnode of created dir */
        xfs_trans_t             *tp;
        int                     cancel_flags;
        int                     error;
@@ -2700,8 +2654,8 @@ xfs_mkdir(
        if (DM_EVENT_ENABLED(dp, DM_EVENT_CREATE)) {
                error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE,
-                                        dir_vp, DM_RIGHT_NULL, NULL,
+                                        dp, DM_RIGHT_NULL, NULL,
-                                        DM_RIGHT_NULL, dir_name, NULL,
+                                        DM_RIGHT_NULL, dir_name->name, NULL,
                                        mode, 0, 0);
                if (error)
                        return error;
@@ -2730,7 +2684,7 @@ xfs_mkdir(
        tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR);
        cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
-        resblks = XFS_MKDIR_SPACE_RES(mp, dir_namelen);
+        resblks = XFS_MKDIR_SPACE_RES(mp, dir_name->len);
        error = xfs_trans_reserve(tp, resblks, XFS_MKDIR_LOG_RES(mp), 0,
                                  XFS_TRANS_PERM_LOG_RES, XFS_MKDIR_LOG_COUNT);
        if (error == ENOSPC) {
@@ -2762,8 +2716,8 @@ xfs_mkdir(
        if (error)
                goto error_return;
-        if (resblks == 0 &&
+        error = xfs_dir_canenter(tp, dp, dir_name, resblks);
-            (error = xfs_dir_canenter(tp, dp, dir_name, dir_namelen)))
+        if (error)
                goto error_return;
        /*
         * create the directory inode.
@@ -2786,15 +2740,15 @@ xfs_mkdir(
         * from here on will result in the transaction cancel
         * unlocking dp so don't do it explicitly in the error path.
         */
-        VN_HOLD(dir_vp);
+        IHOLD(dp);
        xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
        unlock_dp_on_error = B_FALSE;
        XFS_BMAP_INIT(&free_list, &first_block);
-        error = xfs_dir_createname(tp, dp, dir_name, dir_namelen, cdp->i_ino,
+        error = xfs_dir_createname(tp, dp, dir_name, cdp->i_ino,
-                                   &first_block, &free_list, resblks ?
+                                        &first_block, &free_list, resblks ?
-                                   resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
+                                        resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
        if (error) {
                ASSERT(error != ENOSPC);
                goto error1;
@@ -2817,11 +2771,9 @@ xfs_mkdir(
        if (error)
                goto error2;
-        cvp = XFS_ITOV(cdp);
        created = B_TRUE;
-        *vpp = cvp;
+        *ipp = cdp;
        IHOLD(cdp);
        /*
@@ -2858,10 +2810,10 @@ std_return:
        if ((created || (error != 0 && dm_event_sent != 0)) &&
            DM_EVENT_ENABLED(dp, DM_EVENT_POSTCREATE)) {
                (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE,
-                                        dir_vp, DM_RIGHT_NULL,
+                                        dp, DM_RIGHT_NULL,
-                                        created ? XFS_ITOV(cdp):NULL,
+                                        created ? cdp : NULL,
                                        DM_RIGHT_NULL,
-                                        dir_name, NULL,
+                                        dir_name->name, NULL,
                                        mode, error, 0);
        }
        return error;
@@ -2885,20 +2837,17 @@ std_return:
 int
 xfs_rmdir(
        xfs_inode_t             *dp,
-        bhv_vname_t             *dentry)
+        struct xfs_name         *name,
+        xfs_inode_t             *cdp)
 {
        bhv_vnode_t             *dir_vp = XFS_ITOV(dp);
-        char                    *name = VNAME(dentry);
-        int                     namelen = VNAMELEN(dentry);
        xfs_mount_t             *mp = dp->i_mount;
-        xfs_inode_t             *cdp;   /* child directory */
        xfs_trans_t             *tp;
        int                     error;
        xfs_bmap_free_t         free_list;
        xfs_fsblock_t           first_block;
        int                     cancel_flags;
        int                     committed;
-        int                     dm_di_mode = S_IFDIR;
        int                     last_cdp_link;
        uint                    resblks;
@@ -2907,24 +2856,15 @@ xfs_rmdir(
        if (XFS_FORCED_SHUTDOWN(mp))
                return XFS_ERROR(EIO);
-        if (!xfs_get_dir_entry(dentry, &cdp)) {
-                dm_di_mode = cdp->i_d.di_mode;
-                IRELE(cdp);
-        }
        if (DM_EVENT_ENABLED(dp, DM_EVENT_REMOVE)) {
                error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE,
-                                        dir_vp, DM_RIGHT_NULL,
+                                        dp, DM_RIGHT_NULL,
-                                        NULL, DM_RIGHT_NULL,
+                                        NULL, DM_RIGHT_NULL, name->name,
-                                        name, NULL, dm_di_mode, 0, 0);
+                                        NULL, cdp->i_d.di_mode, 0, 0);
                if (error)
                        return XFS_ERROR(error);
        }
-        /* Return through std_return after this point. */
-        cdp = NULL;
        /*
         * We need to get a reference to cdp before we get our log
         * reservation.  The reason for this is that we cannot call
@@ -2937,13 +2877,7 @@ xfs_rmdir(
         * when we call xfs_iget.  Instead we get an unlocked reference
         * to the inode before getting our log reservation.
         */
-        error = xfs_get_dir_entry(dentry, &cdp);
+        IHOLD(cdp);
-        if (error) {
-                REMOVE_DEBUG_TRACE(__LINE__);
-                goto std_return;
-        }
-        mp = dp->i_mount;
-        dm_di_mode = cdp->i_d.di_mode;
        /*
         * Get the dquots for the inodes.
@@ -3020,7 +2954,7 @@ xfs_rmdir(
                goto error_return;
        }
-        error = xfs_dir_removename(tp, dp, name, namelen, cdp->i_ino,
+        error = xfs_dir_removename(tp, dp, name, cdp->i_ino,
                                        &first_block, &free_list, resblks);
        if (error)
                goto error1;
@@ -3098,9 +3032,9 @@ xfs_rmdir(
 std_return:
        if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) {
                (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE,
-                                        dir_vp, DM_RIGHT_NULL,
+                                        dp, DM_RIGHT_NULL,
                                        NULL, DM_RIGHT_NULL,
-                                        name, NULL, dm_di_mode,
+                                        name->name, NULL, cdp->i_d.di_mode,
                                        error, 0);
        }
        return error;
@@ -3118,13 +3052,12 @@ xfs_rmdir(
 int
 xfs_symlink(
        xfs_inode_t             *dp,
-        bhv_vname_t             *dentry,
+        struct xfs_name         *link_name,
-        char                    *target_path,
+        const char              *target_path,
        mode_t                  mode,
-        bhv_vnode_t             **vpp,
+        xfs_inode_t             **ipp,
        cred_t                  *credp)
 {
-        bhv_vnode_t             *dir_vp = XFS_ITOV(dp);
        xfs_mount_t             *mp = dp->i_mount;
        xfs_trans_t             *tp;
        xfs_inode_t             *ip;
@@ -3140,17 +3073,15 @@ xfs_symlink(
        int                     nmaps;
        xfs_bmbt_irec_t         mval[SYMLINK_MAPS];
        xfs_daddr_t             d;
-        char                    *cur_chunk;
+        const char              *cur_chunk;
        int                     byte_cnt;
        int                     n;
        xfs_buf_t               *bp;
        xfs_prid_t              prid;
        struct xfs_dquot        *udqp, *gdqp;
        uint                    resblks;
-        char                    *link_name = VNAME(dentry);
-        int                     link_namelen;
-        *vpp = NULL;
+        *ipp = NULL;
        error = 0;
        ip = NULL;
        tp = NULL;
@@ -3160,44 +3091,17 @@ xfs_symlink(
        if (XFS_FORCED_SHUTDOWN(mp))
                return XFS_ERROR(EIO);
-        link_namelen = VNAMELEN(dentry);
        /*
         * Check component lengths of the target path name.
         */
        pathlen = strlen(target_path);
        if (pathlen >= MAXPATHLEN)      /* total string too long */
                return XFS_ERROR(ENAMETOOLONG);
-        if (pathlen >= MAXNAMELEN) {    /* is any component too long? */
-                int len, total;
-                char *path;
-                for (total = 0, path = target_path; total < pathlen;) {
-                        /*
-                         * Skip any slashes.
-                         */
-                        while(*path == '/') {
-                                total++;
-                                path++;
-                        }
-                        /*
-                         * Count up to the next slash or end of path.
-                         * Error out if the component is bigger than MAXNAMELEN.
-                         */
-                        for(len = 0; *path != '/' && total < pathlen;total++, path++) {
-                                if (++len >= MAXNAMELEN) {
-                                        error = ENAMETOOLONG;
-                                        return error;
-                                }
-                        }
-                }
-        }
        if (DM_EVENT_ENABLED(dp, DM_EVENT_SYMLINK)) {
-                error = XFS_SEND_NAMESP(mp, DM_EVENT_SYMLINK, dir_vp,
+                error = XFS_SEND_NAMESP(mp, DM_EVENT_SYMLINK, dp,
                                        DM_RIGHT_NULL, NULL, DM_RIGHT_NULL,
-                                        link_name, target_path, 0, 0, 0);
+                                        link_name->name, target_path, 0, 0, 0);
                if (error)
                        return error;
        }
@@ -3229,7 +3133,7 @@ xfs_symlink(
                fs_blocks = 0;
        else
                fs_blocks = XFS_B_TO_FSB(mp, pathlen);
-        resblks = XFS_SYMLINK_SPACE_RES(mp, link_namelen, fs_blocks);
+        resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks);
        error = xfs_trans_reserve(tp, resblks, XFS_SYMLINK_LOG_RES(mp), 0,
                        XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT);
        if (error == ENOSPC && fs_blocks == 0) {
@@ -3263,8 +3167,8 @@ xfs_symlink(
        /*
         * Check for ability to enter directory entry, if no space reserved.
         */
-        if (resblks == 0 &&
+        error = xfs_dir_canenter(tp, dp, link_name, resblks);
-            (error = xfs_dir_canenter(tp, dp, link_name, link_namelen)))
+        if (error)
                goto error_return;
        /*
         * Initialize the bmap freelist prior to calling either
@@ -3289,7 +3193,7 @@ xfs_symlink(
         * transaction cancel unlocking dp so don't do it explicitly in the
         * error path.
         */
-        VN_HOLD(dir_vp);
+        IHOLD(dp);
        xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
        unlock_dp_on_error = B_FALSE;
@@ -3356,8 +3260,8 @@ xfs_symlink(
        /*
         * Create the directory entry for the symlink.
         */
-        error = xfs_dir_createname(tp, dp, link_name, link_namelen, ip->i_ino,
+        error = xfs_dir_createname(tp, dp, link_name, ip->i_ino,
-                                   &first_block, &free_list, resblks);
+                                        &first_block, &free_list, resblks);
        if (error)
                goto error1;
        xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -3399,19 +3303,14 @@ xfs_symlink(
 std_return:
        if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTSYMLINK)) {
                (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTSYMLINK,
-                                        dir_vp, DM_RIGHT_NULL,
+                                        dp, DM_RIGHT_NULL,
-                                        error ? NULL : XFS_ITOV(ip),
+                                        error ? NULL : ip,
-                                        DM_RIGHT_NULL, link_name, target_path,
+                                        DM_RIGHT_NULL, link_name->name,
-                                        0, error, 0);
+                                        target_path, 0, error, 0);
        }
-        if (!error) {
+        if (!error)
-                bhv_vnode_t *vp;
+                *ipp = ip;
-                ASSERT(ip);
-                vp = XFS_ITOV(ip);
-                *vpp = vp;
-        }
        return error;
 error2:
@@ -3431,60 +3330,11 @@ std_return:
 }
 int
-xfs_rwlock(
-        xfs_inode_t     *ip,
-        bhv_vrwlock_t   locktype)
-{
-        if (S_ISDIR(ip->i_d.di_mode))
-                return 1;
-        if (locktype == VRWLOCK_WRITE) {
-                xfs_ilock(ip, XFS_IOLOCK_EXCL);
-        } else if (locktype == VRWLOCK_TRY_READ) {
-                return xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED);
-        } else if (locktype == VRWLOCK_TRY_WRITE) {
-                return xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL);
-        } else {
-                ASSERT((locktype == VRWLOCK_READ) ||
-                       (locktype == VRWLOCK_WRITE_DIRECT));
-                xfs_ilock(ip, XFS_IOLOCK_SHARED);
-        }
-        return 1;
-}
-void
-xfs_rwunlock(
-        xfs_inode_t     *ip,
-        bhv_vrwlock_t   locktype)
-{
-        if (S_ISDIR(ip->i_d.di_mode))
-                return;
-        if (locktype == VRWLOCK_WRITE) {
-                /*
-                 * In the write case, we may have added a new entry to
-                 * the reference cache.  This might store a pointer to
-                 * an inode to be released in this inode.  If it is there,
-                 * clear the pointer and release the inode after unlocking
-                 * this one.
-                 */
-                xfs_refcache_iunlock(ip, XFS_IOLOCK_EXCL);
-        } else {
-                ASSERT((locktype == VRWLOCK_READ) ||
-                       (locktype == VRWLOCK_WRITE_DIRECT));
-                xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-        }
-        return;
-}
-int
 xfs_inode_flush(
        xfs_inode_t     *ip,
        int             flags)
 {
        xfs_mount_t     *mp = ip->i_mount;
-        xfs_inode_log_item_t *iip = ip->i_itemp;
        int             error = 0;
        if (XFS_FORCED_SHUTDOWN(mp))
@@ -3494,33 +3344,9 @@ xfs_inode_flush(
         * Bypass inodes which have already been cleaned by
         * the inode flush clustering code inside xfs_iflush
         */
-        if ((ip->i_update_core == 0) &&
+        if (xfs_inode_clean(ip))
-            ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL)))
                return 0;
-        if (flags & FLUSH_LOG) {
-                if (iip && iip->ili_last_lsn) {
-                        xlog_t          *log = mp->m_log;
-                        xfs_lsn_t       sync_lsn;
-                        int             log_flags = XFS_LOG_FORCE;
-                        spin_lock(&log->l_grant_lock);
-                        sync_lsn = log->l_last_sync_lsn;
-                        spin_unlock(&log->l_grant_lock);
-                        if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) > 0)) {
-                                if (flags & FLUSH_SYNC)
-                                        log_flags |= XFS_LOG_SYNC;
-                                error = xfs_log_force(mp, iip->ili_last_lsn, log_flags);
-                                if (error)
-                                        return error;
-                        }
-                        if (ip->i_update_core == 0)
-                                return 0;
-                }
-        }
        /*
         * We make this non-blocking if the inode is contended,
         * return EAGAIN to indicate to the caller that they
@@ -3528,30 +3354,22 @@ xfs_inode_flush(
         * blocking on inodes inside another operation right
         * now, they get caught later by xfs_sync.
         */
-        if (flags & FLUSH_INODE) {
+        if (flags & FLUSH_SYNC) {
-                int     flush_flags;
+                xfs_ilock(ip, XFS_ILOCK_SHARED);
+                xfs_iflock(ip);
-                if (flags & FLUSH_SYNC) {
+        } else if (xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
-                        xfs_ilock(ip, XFS_ILOCK_SHARED);
+                if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) {
-                        xfs_iflock(ip);
+                        xfs_iunlock(ip, XFS_ILOCK_SHARED);
-                } else if (xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
-                        if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) {
-                                xfs_iunlock(ip, XFS_ILOCK_SHARED);
-                                return EAGAIN;
-                        }
-                } else {
                        return EAGAIN;
                }
+        } else {
-                if (flags & FLUSH_SYNC)
+                return EAGAIN;
-                        flush_flags = XFS_IFLUSH_SYNC;
-                else
-                        flush_flags = XFS_IFLUSH_ASYNC;
-                error = xfs_iflush(ip, flush_flags);
-                xfs_iunlock(ip, XFS_ILOCK_SHARED);
        }
+        error = xfs_iflush(ip, (flags & FLUSH_SYNC) ? XFS_IFLUSH_SYNC
+                                                    : XFS_IFLUSH_ASYNC_NOBLOCK);
+        xfs_iunlock(ip, XFS_ILOCK_SHARED);
        return error;
 }
@@ -3694,12 +3512,12 @@ xfs_finish_reclaim(
         * We get the flush lock regardless, though, just to make sure
         * we don't free it while it is being flushed.
         */
-        if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+        if (!locked) {
-                if (!locked) {
+                xfs_ilock(ip, XFS_ILOCK_EXCL);
-                        xfs_ilock(ip, XFS_ILOCK_EXCL);
+                xfs_iflock(ip);
-                        xfs_iflock(ip);
+        }
-                }
+        if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
                if (ip->i_update_core ||
                    ((ip->i_itemp != NULL) &&
                     (ip->i_itemp->ili_format.ilf_fields != 0))) {
@@ -3719,17 +3537,11 @@ xfs_finish_reclaim(
                ASSERT(ip->i_update_core == 0);
                ASSERT(ip->i_itemp == NULL ||
                       ip->i_itemp->ili_format.ilf_fields == 0);
-                xfs_iunlock(ip, XFS_ILOCK_EXCL);
-        } else if (locked) {
-                /*
-                 * We are not interested in doing an iflush if we're
-                 * in the process of shutting down the filesystem forcibly.
-                 * So, just reclaim the inode.
-                 */
-                xfs_ifunlock(ip);
-                xfs_iunlock(ip, XFS_ILOCK_EXCL);
        }
+        xfs_ifunlock(ip);
+        xfs_iunlock(ip, XFS_ILOCK_EXCL);
 reclaim:
        xfs_ireclaim(ip);
        return 0;
@@ -3845,9 +3657,8 @@ xfs_alloc_file_space(
                end_dmi_offset = offset+len;
                if (end_dmi_offset > ip->i_size)
                        end_dmi_offset = ip->i_size;
-                error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, XFS_ITOV(ip),
+                error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, offset,
-                        offset, end_dmi_offset - offset,
+                                      end_dmi_offset - offset, 0, NULL);
-                        0, NULL);
                if (error)
                        return error;
        }
@@ -3956,8 +3767,8 @@ dmapi_enospc_check:
        if (error == ENOSPC && (attr_flags & ATTR_DMI) == 0 &&
            DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE)) {
                error = XFS_SEND_NAMESP(mp, DM_EVENT_NOSPACE,
-                                XFS_ITOV(ip), DM_RIGHT_NULL,
+                                ip, DM_RIGHT_NULL,
-                                XFS_ITOV(ip), DM_RIGHT_NULL,
+                                ip, DM_RIGHT_NULL,
                                NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */
                if (error == 0)
                        goto retry;     /* Maybe DMAPI app. has made space */
@@ -4021,7 +3832,8 @@ xfs_zero_remaining_bytes(
                XFS_BUF_READ(bp);
                XFS_BUF_SET_ADDR(bp, XFS_FSB_TO_DB(ip, imap.br_startblock));
                xfsbdstrat(mp, bp);
-                if ((error = xfs_iowait(bp))) {
+                error = xfs_iowait(bp);
+                if (error) {
                        xfs_ioerror_alert("xfs_zero_remaining_bytes(read)",
                                          mp, bp, XFS_BUF_ADDR(bp));
                        break;
@@ -4033,7 +3845,8 @@ xfs_zero_remaining_bytes(
                XFS_BUF_UNREAD(bp);
                XFS_BUF_WRITE(bp);
                xfsbdstrat(mp, bp);
-                if ((error = xfs_iowait(bp))) {
+                error = xfs_iowait(bp);
+                if (error) {
                        xfs_ioerror_alert("xfs_zero_remaining_bytes(write)",
                                          mp, bp, XFS_BUF_ADDR(bp));
                        break;
@@ -4102,7 +3915,7 @@ xfs_free_file_space(
            DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) {
                if (end_dmi_offset > ip->i_size)
                        end_dmi_offset = ip->i_size;
-                error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, vp,
+                error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip,
                                offset, end_dmi_offset - offset,
                                AT_DELAY_FLAG(attr_flags), NULL);
                if (error)
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 4e3970f0e5e3..24c53923dc2c 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -23,31 +23,32 @@ int xfs_fsync(struct xfs_inode *ip, int flag, xfs_off_t start,
                xfs_off_t stop);
 int xfs_release(struct xfs_inode *ip);
 int xfs_inactive(struct xfs_inode *ip);
-int xfs_lookup(struct xfs_inode *dp, bhv_vname_t *dentry,
+int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
-                bhv_vnode_t **vpp);
+                struct xfs_inode **ipp);
-int xfs_create(struct xfs_inode *dp, bhv_vname_t *dentry, mode_t mode,
+int xfs_create(struct xfs_inode *dp, struct xfs_name *name, mode_t mode,
-                xfs_dev_t rdev, bhv_vnode_t **vpp, struct cred *credp);
+                xfs_dev_t rdev, struct xfs_inode **ipp, struct cred *credp);
-int xfs_remove(struct xfs_inode *dp, bhv_vname_t        *dentry);
+int xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
-int xfs_link(struct xfs_inode *tdp, bhv_vnode_t *src_vp,
+                struct xfs_inode *ip);
-                bhv_vname_t *dentry);
+int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
-int xfs_mkdir(struct xfs_inode *dp, bhv_vname_t *dentry,
+                struct xfs_name *target_name);
-                mode_t mode, bhv_vnode_t **vpp, struct cred *credp);
+int xfs_mkdir(struct xfs_inode *dp, struct xfs_name *dir_name,
-int xfs_rmdir(struct xfs_inode *dp, bhv_vname_t *dentry);
+                mode_t mode, struct xfs_inode **ipp, struct cred *credp);
+int xfs_rmdir(struct xfs_inode *dp, struct xfs_name *name,
+                struct xfs_inode *cdp);
 int xfs_readdir(struct xfs_inode        *dp, void *dirent, size_t bufsize,
                       xfs_off_t *offset, filldir_t filldir);
-int xfs_symlink(struct xfs_inode *dp, bhv_vname_t *dentry,
+int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name,
-                char *target_path, mode_t mode, bhv_vnode_t **vpp,
+                const char *target_path, mode_t mode, struct xfs_inode **ipp,
                struct cred *credp);
-int xfs_rwlock(struct xfs_inode *ip, bhv_vrwlock_t locktype);
-void xfs_rwunlock(struct xfs_inode *ip, bhv_vrwlock_t locktype);
 int xfs_inode_flush(struct xfs_inode *ip, int flags);
 int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state);
 int xfs_reclaim(struct xfs_inode *ip);
 int xfs_change_file_space(struct xfs_inode *ip, int cmd,
                xfs_flock64_t *bf, xfs_off_t offset,
                struct cred *credp, int attr_flags);
-int xfs_rename(struct xfs_inode *src_dp, bhv_vname_t *src_vname,
+int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name,
-                bhv_vnode_t *target_dir_vp, bhv_vname_t *target_vname);
+                struct xfs_inode *src_ip, struct xfs_inode *target_dp,
+                struct xfs_name *target_name);
 int xfs_attr_get(struct xfs_inode *ip, const char *name, char *value,
                int *valuelenp, int flags, cred_t *cred);
 int xfs_attr_set(struct xfs_inode *dp, const char *name, char *value,
author	Linus Torvalds <torvalds@linux-foundation.org>	2008-04-18 11:39:39 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2008-04-18 11:39:39 -0400
commit	253ba4e79edc695b2925bd2ef34de06ff4d4070c (patch)
tree	259667140ca702d6a218cc54f4df275fbbda747b
parent	188da98800893691e47eea9335a234378e32aceb (diff)
parent	65e67f5165c8a156b34ee7adf65d5ed3b16a910d (diff)