102 files changed, 3152 insertions, 5656 deletions
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 480f28127f09..6100ec0fa1d4 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -22,6 +22,7 @@ config XFS_FS
 config XFS_QUOTA
        bool "XFS Quota support"
        depends on XFS_FS
+        select QUOTACTL
        help
          If you say Y here, you will be able to set limits for disk usage on
          a per user and/or a per group basis under XFS.  XFS considers quota
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index c8fb13f83b3f..0dce969d6cad 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -87,11 +87,9 @@ xfs-y				+= xfs_alloc.o \
                                   xfs_trans_buf.o \
                                   xfs_trans_extfree.o \
                                   xfs_trans_inode.o \
-                                   xfs_trans_item.o \
                                   xfs_utils.o \
                                   xfs_vnodeops.o \
-                                   xfs_rw.o \
+                                   xfs_rw.o
-                                   xfs_dmops.o
 xfs-$(CONFIG_XFS_TRACE)         += xfs_btree_trace.o
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
index 9f769b5b38fc..b2771862fd3d 100644
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -225,7 +225,7 @@ xfs_check_acl(struct inode *inode, int mask)
        struct posix_acl *acl;
        int error = -EAGAIN;
-        xfs_itrace_entry(ip);
+        trace_xfs_check_acl(ip);
        /*
         * If there is no attribute fork no ACL exists on this inode and
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 34640d6dbdcb..c9af48fffcd7 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -21,19 +21,12 @@
 #include "xfs_inum.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
 #include "xfs_trans.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_alloc.h"
-#include "xfs_btree.h"
 #include "xfs_error.h"
 #include "xfs_rw.h"
 #include "xfs_iomap.h"
@@ -92,18 +85,15 @@ void
 xfs_count_page_state(
        struct page             *page,
        int                     *delalloc,
-        int                     *unmapped,
        int                     *unwritten)
 {
        struct buffer_head      *bh, *head;
-        *delalloc = *unmapped = *unwritten = 0;
+        *delalloc = *unwritten = 0;
        bh = head = page_buffers(page);
        do {
-                if (buffer_uptodate(bh) && !buffer_mapped(bh))
+                if (buffer_unwritten(bh))
-                        (*unmapped) = 1;
-                else if (buffer_unwritten(bh))
                        (*unwritten) = 1;
                else if (buffer_delay(bh))
                        (*delalloc) = 1;
@@ -212,23 +202,17 @@ xfs_setfilesize(
 }
 /*
- * Schedule IO completion handling on a xfsdatad if this was
+ * Schedule IO completion handling on the final put of an ioend.
- * the final hold on this ioend. If we are asked to wait,
- * flush the workqueue.
 */
 STATIC void
 xfs_finish_ioend(
-        xfs_ioend_t     *ioend,
+        struct xfs_ioend        *ioend)
-        int             wait)
 {
        if (atomic_dec_and_test(&ioend->io_remaining)) {
-                struct workqueue_struct *wq;
+                if (ioend->io_type == IO_UNWRITTEN)
+                        queue_work(xfsconvertd_workqueue, &ioend->io_work);
-                wq = (ioend->io_type == IO_UNWRITTEN) ?
+                else
-                        xfsconvertd_workqueue : xfsdatad_workqueue;
+                        queue_work(xfsdatad_workqueue, &ioend->io_work);
-                queue_work(wq, &ioend->io_work);
-                if (wait)
-                        flush_workqueue(wq);
        }
 }
@@ -272,11 +256,25 @@ xfs_end_io(
         */
        if (error == EAGAIN) {
                atomic_inc(&ioend->io_remaining);
-                xfs_finish_ioend(ioend, 0);
+                xfs_finish_ioend(ioend);
                /* ensure we don't spin on blocked ioends */
                delay(1);
-        } else
+        } else {
+                if (ioend->io_iocb)
+                        aio_complete(ioend->io_iocb, ioend->io_result, 0);
                xfs_destroy_ioend(ioend);
+        }
+}
+/*
+ * Call IO completion handling in caller context on the final put of an ioend.
+ */
+STATIC void
+xfs_finish_ioend_sync(
+        struct xfs_ioend        *ioend)
+{
+        if (atomic_dec_and_test(&ioend->io_remaining))
+                xfs_end_io(&ioend->io_work);
 }
 /*
@@ -309,6 +307,8 @@ xfs_alloc_ioend(
        atomic_inc(&XFS_I(ioend->io_inode)->i_iocount);
        ioend->io_offset = 0;
        ioend->io_size = 0;
+        ioend->io_iocb = NULL;
+        ioend->io_result = 0;
        INIT_WORK(&ioend->io_work, xfs_end_io);
        return ioend;
@@ -358,7 +358,7 @@ xfs_end_bio(
        bio->bi_end_io = NULL;
        bio_put(bio);
-        xfs_finish_ioend(ioend, 0);
+        xfs_finish_ioend(ioend);
 }
 STATIC void
@@ -500,7 +500,7 @@ xfs_submit_ioend(
                }
                if (bio)
                        xfs_submit_ioend_bio(wbc, ioend, bio);
-                xfs_finish_ioend(ioend, 0);
+                xfs_finish_ioend(ioend);
        } while ((ioend = next) != NULL);
 }
@@ -614,31 +614,30 @@ xfs_map_at_offset(
 STATIC unsigned int
 xfs_probe_page(
        struct page             *page,
-        unsigned int            pg_offset,
+        unsigned int            pg_offset)
-        int                     mapped)
 {
+        struct buffer_head      *bh, *head;
        int                     ret = 0;
        if (PageWriteback(page))
                return 0;
+        if (!PageDirty(page))
+                return 0;
+        if (!page->mapping)
+                return 0;
+        if (!page_has_buffers(page))
+                return 0;
-        if (page->mapping && PageDirty(page)) {
+        bh = head = page_buffers(page);
-                if (page_has_buffers(page)) {
+        do {
-                        struct buffer_head      *bh, *head;
+                if (!buffer_uptodate(bh))
+                        break;
-                        bh = head = page_buffers(page);
+                if (!buffer_mapped(bh))
-                        do {
+                        break;
-                                if (!buffer_uptodate(bh))
+                ret += bh->b_size;
-                                        break;
+                if (ret >= pg_offset)
-                                if (mapped != buffer_mapped(bh))
+                        break;
-                                        break;
+        } while ((bh = bh->b_this_page) != head);
-                                ret += bh->b_size;
-                                if (ret >= pg_offset)
-                                        break;
-                        } while ((bh = bh->b_this_page) != head);
-                } else
-                        ret = mapped ? 0 : PAGE_CACHE_SIZE;
-        }
        return ret;
 }
@@ -648,8 +647,7 @@ xfs_probe_cluster(
        struct inode            *inode,
        struct page             *startpage,
        struct buffer_head      *bh,
-        struct buffer_head      *head,
+        struct buffer_head      *head)
-        int                     mapped)
 {
        struct pagevec          pvec;
        pgoff_t                 tindex, tlast, tloff;
@@ -658,7 +656,7 @@ xfs_probe_cluster(
        /* First sum forwards in this page */
        do {
-                if (!buffer_uptodate(bh) || (mapped != buffer_mapped(bh)))
+                if (!buffer_uptodate(bh) || !buffer_mapped(bh))
                        return total;
                total += bh->b_size;
        } while ((bh = bh->b_this_page) != head);
@@ -692,7 +690,7 @@ xfs_probe_cluster(
                                pg_offset = PAGE_CACHE_SIZE;
                        if (page->index == tindex && trylock_page(page)) {
-                                pg_len = xfs_probe_page(page, pg_offset, mapped);
+                                pg_len = xfs_probe_page(page, pg_offset);
                                unlock_page(page);
                        }
@@ -761,7 +759,6 @@ xfs_convert_page(
        struct xfs_bmbt_irec    *imap,
        xfs_ioend_t             **ioendp,
        struct writeback_control *wbc,
-        int                     startio,
        int                     all_bh)
 {
        struct buffer_head      *bh, *head;
@@ -832,19 +829,14 @@ xfs_convert_page(
                        ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
                        xfs_map_at_offset(inode, bh, imap, offset);
-                        if (startio) {
+                        xfs_add_to_ioend(inode, bh, offset, type,
-                                xfs_add_to_ioend(inode, bh, offset,
+                                         ioendp, done);
-                                                type, ioendp, done);
-                        } else {
-                                set_buffer_dirty(bh);
-                                unlock_buffer(bh);
-                                mark_buffer_dirty(bh);
-                        }
                        page_dirty--;
                        count++;
                } else {
                        type = IO_NEW;
-                        if (buffer_mapped(bh) && all_bh && startio) {
+                        if (buffer_mapped(bh) && all_bh) {
                                lock_buffer(bh);
                                xfs_add_to_ioend(inode, bh, offset,
                                                type, ioendp, done);
@@ -859,14 +851,12 @@ xfs_convert_page(
        if (uptodate && bh == head)
                SetPageUptodate(page);
-        if (startio) {
+        if (count) {
-                if (count) {
+                if (--wbc->nr_to_write <= 0 &&
-                        wbc->nr_to_write--;
+                    wbc->sync_mode == WB_SYNC_NONE)
-                        if (wbc->nr_to_write <= 0)
+                        done = 1;
-                                done = 1;
-                }
-                xfs_start_page_writeback(page, !page_dirty, count);
        }
+        xfs_start_page_writeback(page, !page_dirty, count);
        return done;
 fail_unlock_page:
@@ -886,7 +876,6 @@ xfs_cluster_write(
        struct xfs_bmbt_irec    *imap,
        xfs_ioend_t             **ioendp,
        struct writeback_control *wbc,
-        int                     startio,
        int                     all_bh,
        pgoff_t                 tlast)
 {
@@ -902,7 +891,7 @@ xfs_cluster_write(
                for (i = 0; i < pagevec_count(&pvec); i++) {
                        done = xfs_convert_page(inode, pvec.pages[i], tindex++,
-                                        imap, ioendp, wbc, startio, all_bh);
+                                        imap, ioendp, wbc, all_bh);
                        if (done)
                                break;
                }
@@ -981,7 +970,7 @@ xfs_aops_discard_page(
                 */
                error = xfs_bmapi(NULL, ip, offset_fsb, 1,
                                XFS_BMAPI_ENTIRE,  NULL, 0, &imap,
-                                &nimaps, NULL, NULL);
+                                &nimaps, NULL);
                if (error) {
                        /* something screwed, just bail */
@@ -1009,7 +998,7 @@ xfs_aops_discard_page(
                 */
                xfs_bmap_init(&flist, &firstblock);
                error = xfs_bunmapi(NULL, ip, offset_fsb, 1, 0, 1, &firstblock,
-                                        &flist, NULL, &done);
+                                        &flist, &done);
                ASSERT(!flist.xbf_count && !flist.xbf_first);
                if (error) {
@@ -1032,50 +1021,66 @@ out_invalidate:
 }
 /*
- * Calling this without startio set means we are being asked to make a dirty
+ * Write out a dirty page.
- * page ready for freeing it's buffers.  When called with startio set then
+ *
- * we are coming from writepage.
+ * For delalloc space on the page we need to allocate space and flush it.
+ * For unwritten space on the page we need to start the conversion to
+ * regular allocated space.
+ * For any other dirty buffer heads on the page we should flush them.
 *
- * When called with startio set it is important that we write the WHOLE
+ * If we detect that a transaction would be required to flush the page, we
- * page if possible.
+ * have to check the process flags first, if we are already in a transaction
- * The bh->b_state's cannot know if any of the blocks or which block for
+ * or disk I/O during allocations is off, we need to fail the writepage and
- * that matter are dirty due to mmap writes, and therefore bh uptodate is
+ * redirty the page.
- * only valid if the page itself isn't completely uptodate.  Some layers
- * may clear the page dirty flag prior to calling write page, under the
- * assumption the entire page will be written out; by not writing out the
- * whole page the page can be reused before all valid dirty data is
- * written out.  Note: in the case of a page that has been dirty'd by
- * mapwrite and but partially setup by block_prepare_write the
- * bh->b_states's will not agree and only ones setup by BPW/BCW will have
- * valid state, thus the whole page must be written out thing.
 */
 STATIC int
-xfs_page_state_convert(
+xfs_vm_writepage(
-        struct inode    *inode,
+        struct page             *page,
-        struct page     *page,
+        struct writeback_control *wbc)
-        struct writeback_control *wbc,
-        int             startio,
-        int             unmapped) /* also implies page uptodate */
 {
+        struct inode            *inode = page->mapping->host;
+        int                     delalloc, unwritten;
        struct buffer_head      *bh, *head;
        struct xfs_bmbt_irec    imap;
        xfs_ioend_t             *ioend = NULL, *iohead = NULL;
        loff_t                  offset;
-        unsigned long           p_offset = 0;
        unsigned int            type;
        __uint64_t              end_offset;
        pgoff_t                 end_index, last_index;
        ssize_t                 size, len;
        int                     flags, err, imap_valid = 0, uptodate = 1;
-        int                     page_dirty, count = 0;
+        int                     count = 0;
-        int                     trylock = 0;
+        int                     all_bh = 0;
-        int                     all_bh = unmapped;
-        if (startio) {
+        trace_xfs_writepage(inode, page, 0);
-                if (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking)
-                        trylock |= BMAPI_TRYLOCK;
+        ASSERT(page_has_buffers(page));
-        }
+        /*
+         * Refuse to write the page out if we are called from reclaim context.
+         *
+         * This avoids stack overflows when called from deeply used stacks in
+         * random callers for direct reclaim or memcg reclaim.  We explicitly
+         * allow reclaim from kswapd as the stack usage there is relatively low.
+         *
+         * This should really be done by the core VM, but until that happens
+         * filesystems like XFS, btrfs and ext4 have to take care of this
+         * by themselves.
+         */
+        if ((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == PF_MEMALLOC)
+                goto redirty;
+        /*
+         * We need a transaction if there are delalloc or unwritten buffers
+         * on the page.
+         *
+         * If we need a transaction and the process flags say we are already
+         * in a transaction, or no IO is allowed then mark the page dirty
+         * again and leave the page as is.
+         */
+        xfs_count_page_state(page, &delalloc, &unwritten);
+        if ((current->flags & PF_FSTRANS) && (delalloc || unwritten))
+                goto redirty;
        /* Is this page beyond the end of the file? */
        offset = i_size_read(inode);
@@ -1084,50 +1089,33 @@ xfs_page_state_convert(
        if (page->index >= end_index) {
                if ((page->index >= end_index + 1) ||
                    !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) {
-                        if (startio)
+                        unlock_page(page);
-                                unlock_page(page);
                        return 0;
                }
        }
-        /*
-         * page_dirty is initially a count of buffers on the page before
-         * EOF and is decremented as we move each into a cleanable state.
-         *
-         * Derivation:
-         *
-         * End offset is the highest offset that this page should represent.
-         * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1))
-         * will evaluate non-zero and be less than PAGE_CACHE_SIZE and
-         * hence give us the correct page_dirty count. On any other page,
-         * it will be zero and in that case we need page_dirty to be the
-         * count of buffers on the page.
-         */
        end_offset = min_t(unsigned long long,
-                        (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, offset);
+                        (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
+                        offset);
        len = 1 << inode->i_blkbits;
-        p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),
-                                        PAGE_CACHE_SIZE);
-        p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
-        page_dirty = p_offset / len;
        bh = head = page_buffers(page);
        offset = page_offset(page);
        flags = BMAPI_READ;
        type = IO_NEW;
-        /* TODO: cleanup count and page_dirty */
        do {
                if (offset >= end_offset)
                        break;
                if (!buffer_uptodate(bh))
                        uptodate = 0;
-                if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio) {
-                        /*
+                /*
-                         * the iomap is actually still valid, but the ioend
+                 * A hole may still be marked uptodate because discard_buffer
-                         * isn't.  shouldn't happen too often.
+                 * leaves the flag set.
-                         */
+                 */
+                if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
+                        ASSERT(!buffer_dirty(bh));
                        imap_valid = 0;
                        continue;
                }
@@ -1135,19 +1123,7 @@ xfs_page_state_convert(
                if (imap_valid)
                        imap_valid = xfs_imap_valid(inode, &imap, offset);
-                /*
+                if (buffer_unwritten(bh) || buffer_delay(bh)) {
-                 * First case, map an unwritten extent and prepare for
-                 * extent state conversion transaction on completion.
-                 *
-                 * Second case, allocate space for a delalloc buffer.
-                 * We can return EAGAIN here in the release page case.
-                 *
-                 * Third case, an unmapped buffer was found, and we are
-                 * in a path where we need to write the whole page out.
-                 */
-                if (buffer_unwritten(bh) || buffer_delay(bh) ||
-                    ((buffer_uptodate(bh) || PageUptodate(page)) &&
-                     !buffer_mapped(bh) && (unmapped || startio))) {
                        int new_ioend = 0;
                        /*
@@ -1161,15 +1137,15 @@ xfs_page_state_convert(
                                flags = BMAPI_WRITE | BMAPI_IGNSTATE;
                        } else if (buffer_delay(bh)) {
                                type = IO_DELAY;
-                                flags = BMAPI_ALLOCATE | trylock;
+                                flags = BMAPI_ALLOCATE;
-                        } else {
-                                type = IO_NEW;
+                                if (wbc->sync_mode == WB_SYNC_NONE)
-                                flags = BMAPI_WRITE | BMAPI_MMAP;
+                                        flags |= BMAPI_TRYLOCK;
                        }
                        if (!imap_valid) {
                                /*
-                                 * if we didn't have a valid mapping then we
+                                 * If we didn't have a valid mapping then we
                                 * need to ensure that we put the new mapping
                                 * in a new ioend structure. This needs to be
                                 * done to ensure that the ioends correctly
@@ -1177,14 +1153,7 @@ xfs_page_state_convert(
                                 * for unwritten extent conversion.
                                 */
                                new_ioend = 1;
-                                if (type == IO_NEW) {
+                                err = xfs_map_blocks(inode, offset, len,
-                                        size = xfs_probe_cluster(inode,
-                                                        page, bh, head, 0);
-                                } else {
-                                        size = len;
-                                }
-                                err = xfs_map_blocks(inode, offset, size,
                                                &imap, flags);
                                if (err)
                                        goto error;
@@ -1193,19 +1162,11 @@ xfs_page_state_convert(
                        }
                        if (imap_valid) {
                                xfs_map_at_offset(inode, bh, &imap, offset);
-                                if (startio) {
+                                xfs_add_to_ioend(inode, bh, offset, type,
-                                        xfs_add_to_ioend(inode, bh, offset,
+                                                 &ioend, new_ioend);
-                                                        type, &ioend,
-                                                        new_ioend);
-                                } else {
-                                        set_buffer_dirty(bh);
-                                        unlock_buffer(bh);
-                                        mark_buffer_dirty(bh);
-                                }
-                                page_dirty--;
                                count++;
                        }
-                } else if (buffer_uptodate(bh) && startio) {
+                } else if (buffer_uptodate(bh)) {
                        /*
                         * we got here because the buffer is already mapped.
                         * That means it must already have extents allocated
@@ -1213,8 +1174,7 @@ xfs_page_state_convert(
                         */
                        if (!imap_valid || flags != BMAPI_READ) {
                                flags = BMAPI_READ;
-                                size = xfs_probe_cluster(inode, page, bh,
+                                size = xfs_probe_cluster(inode, page, bh, head);
-                                                                head, 1);
                                err = xfs_map_blocks(inode, offset, size,
                                                &imap, flags);
                                if (err)
@@ -1233,18 +1193,16 @@ xfs_page_state_convert(
                         */
                        type = IO_NEW;
                        if (trylock_buffer(bh)) {
-                                ASSERT(buffer_mapped(bh));
                                if (imap_valid)
                                        all_bh = 1;
                                xfs_add_to_ioend(inode, bh, offset, type,
                                                &ioend, !imap_valid);
-                                page_dirty--;
                                count++;
                        } else {
                                imap_valid = 0;
                        }
-                } else if ((buffer_uptodate(bh) || PageUptodate(page)) &&
+                } else if (PageUptodate(page)) {
-                           (unmapped || startio)) {
+                        ASSERT(buffer_mapped(bh));
                        imap_valid = 0;
                }
@@ -1256,8 +1214,7 @@ xfs_page_state_convert(
        if (uptodate && bh == head)
                SetPageUptodate(page);
-        if (startio)
+        xfs_start_page_writeback(page, 1, count);
-                xfs_start_page_writeback(page, 1, count);
        if (ioend && imap_valid) {
                xfs_off_t               end_index;
@@ -1275,131 +1232,30 @@ xfs_page_state_convert(
                        end_index = last_index;
                xfs_cluster_write(inode, page->index + 1, &imap, &ioend,
-                                        wbc, startio, all_bh, end_index);
+                                        wbc, all_bh, end_index);
        }
        if (iohead)
                xfs_submit_ioend(wbc, iohead);
-        return page_dirty;
+        return 0;
 error:
        if (iohead)
                xfs_cancel_ioend(iohead);
-        /*
+        if (err == -EAGAIN)
-         * If it's delalloc and we have nowhere to put it,
+                goto redirty;
-         * throw it away, unless the lower layers told
-         * us to try again.
-         */
-        if (err != -EAGAIN) {
-                if (!unmapped)
-                        xfs_aops_discard_page(page);
-                ClearPageUptodate(page);
-        }
-        return err;
-}
-/*
+        xfs_aops_discard_page(page);
- * writepage: Called from one of two places:
+        ClearPageUptodate(page);
- *
+        unlock_page(page);
- * 1. we are flushing a delalloc buffer head.
+        return err;
- *
- * 2. we are writing out a dirty page. Typically the page dirty
- *    state is cleared before we get here. In this case is it
- *    conceivable we have no buffer heads.
- *
- * For delalloc space on the page we need to allocate space and
- * flush it. For unmapped buffer heads on the page we should
- * allocate space if the page is uptodate. For any other dirty
- * buffer heads on the page we should flush them.
- *
- * If we detect that a transaction would be required to flush
- * the page, we have to check the process flags first, if we
- * are already in a transaction or disk I/O during allocations
- * is off, we need to fail the writepage and redirty the page.
- */
-STATIC int
-xfs_vm_writepage(
-        struct page             *page,
-        struct writeback_control *wbc)
-{
-        int                     error;
-        int                     need_trans;
-        int                     delalloc, unmapped, unwritten;
-        struct inode            *inode = page->mapping->host;
-        trace_xfs_writepage(inode, page, 0);
-        /*
-         * Refuse to write the page out if we are called from reclaim context.
-         *
-         * This is primarily to avoid stack overflows when called from deep
-         * used stacks in random callers for direct reclaim, but disabling
-         * reclaim for kswap is a nice side-effect as kswapd causes rather
-         * suboptimal I/O patters, too.
-         *
-         * This should really be done by the core VM, but until that happens
-         * filesystems like XFS, btrfs and ext4 have to take care of this
-         * by themselves.
-         */
-        if (current->flags & PF_MEMALLOC)
-                goto out_fail;
-        /*
-         * We need a transaction if:
-         *  1. There are delalloc buffers on the page
-         *  2. The page is uptodate and we have unmapped buffers
-         *  3. The page is uptodate and we have no buffers
-         *  4. There are unwritten buffers on the page
-         */
-        if (!page_has_buffers(page)) {
-                unmapped = 1;
-                need_trans = 1;
-        } else {
-                xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
-                if (!PageUptodate(page))
-                        unmapped = 0;
-                need_trans = delalloc + unmapped + unwritten;
-        }
-        /*
-         * If we need a transaction and the process flags say
-         * we are already in a transaction, or no IO is allowed
-         * then mark the page dirty again and leave the page
-         * as is.
-         */
-        if (current_test_flags(PF_FSTRANS) && need_trans)
-                goto out_fail;
-        /*
-         * Delay hooking up buffer heads until we have
-         * made our go/no-go decision.
-         */
-        if (!page_has_buffers(page))
-                create_empty_buffers(page, 1 << inode->i_blkbits, 0);
-        /*
-         * Convert delayed allocate, unwritten or unmapped space
-         * to real space and flush out to disk.
-         */
-        error = xfs_page_state_convert(inode, page, wbc, 1, unmapped);
-        if (error == -EAGAIN)
-                goto out_fail;
-        if (unlikely(error < 0))
-                goto out_unlock;
-        return 0;
-out_fail:
+redirty:
        redirty_page_for_writepage(wbc, page);
        unlock_page(page);
        return 0;
-out_unlock:
-        unlock_page(page);
-        return error;
 }
 STATIC int
@@ -1413,65 +1269,27 @@ xfs_vm_writepages(
 /*
 * Called to move a page into cleanable state - and from there
- * to be released. Possibly the page is already clean. We always
+ * to be released. The page should already be clean. We always
 * have buffer heads in this call.
 *
- * Returns 0 if the page is ok to release, 1 otherwise.
+ * Returns 1 if the page is ok to release, 0 otherwise.
- *
- * Possible scenarios are:
- *
- * 1. We are being called to release a page which has been written
- *    to via regular I/O. buffer heads will be dirty and possibly
- *    delalloc. If no delalloc buffer heads in this case then we
- *    can just return zero.
- *
- * 2. We are called to release a page which has been written via
- *    mmap, all we need to do is ensure there is no delalloc
- *    state in the buffer heads, if not we can let the caller
- *    free them and we should come back later via writepage.
 */
 STATIC int
 xfs_vm_releasepage(
        struct page             *page,
        gfp_t                   gfp_mask)
 {
-        struct inode            *inode = page->mapping->host;
+        int                     delalloc, unwritten;
-        int                     dirty, delalloc, unmapped, unwritten;
-        struct writeback_control wbc = {
-                .sync_mode = WB_SYNC_ALL,
-                .nr_to_write = 1,
-        };
-        trace_xfs_releasepage(inode, page, 0);
+        trace_xfs_releasepage(page->mapping->host, page, 0);
-        if (!page_has_buffers(page))
+        xfs_count_page_state(page, &delalloc, &unwritten);
-                return 0;
-        xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
-        if (!delalloc && !unwritten)
-                goto free_buffers;
-        if (!(gfp_mask & __GFP_FS))
+        if (WARN_ON(delalloc))
                return 0;
+        if (WARN_ON(unwritten))
-        /* If we are already inside a transaction or the thread cannot
-         * do I/O, we cannot release this page.
-         */
-        if (current_test_flags(PF_FSTRANS))
                return 0;
-        /*
-         * Convert delalloc space to real space, do not flush the
-         * data out to disk, that will be done by the caller.
-         * Never need to allocate space here - we will always
-         * come back to writepage in that case.
-         */
-        dirty = xfs_page_state_convert(inode, page, &wbc, 0, 0);
-        if (dirty == 0 && !unwritten)
-                goto free_buffers;
-        return 0;
-free_buffers:
        return try_to_free_buffers(page);
 }
@@ -1481,9 +1299,9 @@ __xfs_get_blocks(
        sector_t                iblock,
        struct buffer_head      *bh_result,
        int                     create,
-        int                     direct,
+        int                     direct)
-        bmapi_flags_t           flags)
 {
+        int                     flags = create ? BMAPI_WRITE : BMAPI_READ;
        struct xfs_bmbt_irec    imap;
        xfs_off_t               offset;
        ssize_t                 size;
@@ -1498,8 +1316,11 @@ __xfs_get_blocks(
        if (!create && direct && offset >= i_size_read(inode))
                return 0;
-        error = xfs_iomap(XFS_I(inode), offset, size,
+        if (direct && create)
-                             create ? flags : BMAPI_READ, &imap, &nimap, &new);
+                flags |= BMAPI_DIRECT;
+        error = xfs_iomap(XFS_I(inode), offset, size, flags, &imap, &nimap,
+                          &new);
        if (error)
                return -error;
        if (nimap == 0)
@@ -1579,8 +1400,7 @@ xfs_get_blocks(
        struct buffer_head      *bh_result,
        int                     create)
 {
-        return __xfs_get_blocks(inode, iblock,
+        return __xfs_get_blocks(inode, iblock, bh_result, create, 0);
-                                bh_result, create, 0, BMAPI_WRITE);
 }
 STATIC int
@@ -1590,61 +1410,59 @@ xfs_get_blocks_direct(
        struct buffer_head      *bh_result,
        int                     create)
 {
-        return __xfs_get_blocks(inode, iblock,
+        return __xfs_get_blocks(inode, iblock, bh_result, create, 1);
-                                bh_result, create, 1, BMAPI_WRITE|BMAPI_DIRECT);
 }
+/*
+ * Complete a direct I/O write request.
+ *
+ * If the private argument is non-NULL __xfs_get_blocks signals us that we
+ * need to issue a transaction to convert the range from unwritten to written
+ * extents.  In case this is regular synchronous I/O we just call xfs_end_io
+ * to do this and we are done.  But in case this was a successfull AIO
+ * request this handler is called from interrupt context, from which we
+ * can't start transactions.  In that case offload the I/O completion to
+ * the workqueues we also use for buffered I/O completion.
+ */
 STATIC void
-xfs_end_io_direct(
+xfs_end_io_direct_write(
-        struct kiocb    *iocb,
+        struct kiocb            *iocb,
-        loff_t          offset,
+        loff_t                  offset,
-        ssize_t         size,
+        ssize_t                 size,
-        void            *private)
+        void                    *private,
+        int                     ret,
+        bool                    is_async)
 {
-        xfs_ioend_t     *ioend = iocb->private;
+        struct xfs_ioend        *ioend = iocb->private;
        /*
-         * Non-NULL private data means we need to issue a transaction to
+         * blockdev_direct_IO can return an error even after the I/O
-         * convert a range from unwritten to written extents.  This needs
+         * completion handler was called.  Thus we need to protect
-         * to happen from process context but aio+dio I/O completion
+         * against double-freeing.
-         * happens from irq context so we need to defer it to a workqueue.
-         * This is not necessary for synchronous direct I/O, but we do
-         * it anyway to keep the code uniform and simpler.
-         *
-         * Well, if only it were that simple. Because synchronous direct I/O
-         * requires extent conversion to occur *before* we return to userspace,
-         * we have to wait for extent conversion to complete. Look at the
-         * iocb that has been passed to us to determine if this is AIO or
-         * not. If it is synchronous, tell xfs_finish_ioend() to kick the
-         * workqueue and wait for it to complete.
-         *
-         * The core direct I/O code might be changed to always call the
-         * completion handler in the future, in which case all this can
-         * go away.
         */
+        iocb->private = NULL;
        ioend->io_offset = offset;
        ioend->io_size = size;
-        if (ioend->io_type == IO_READ) {
+        if (private && size > 0)
-                xfs_finish_ioend(ioend, 0);
+                ioend->io_type = IO_UNWRITTEN;
-        } else if (private && size > 0) {
-                xfs_finish_ioend(ioend, is_sync_kiocb(iocb));
+        if (is_async) {
-        } else {
                /*
-                 * A direct I/O write ioend starts it's life in unwritten
+                 * If we are converting an unwritten extent we need to delay
-                 * state in case they map an unwritten extent.  This write
+                 * the AIO completion until after the unwrittent extent
-                 * didn't map an unwritten extent so switch it's completion
+                 * conversion has completed, otherwise do it ASAP.
-                 * handler.
                 */
-                ioend->io_type = IO_NEW;
+                if (ioend->io_type == IO_UNWRITTEN) {
-                xfs_finish_ioend(ioend, 0);
+                        ioend->io_iocb = iocb;
+                        ioend->io_result = ret;
+                } else {
+                        aio_complete(iocb, ret, 0);
+                }
+                xfs_finish_ioend(ioend);
+        } else {
+                xfs_finish_ioend_sync(ioend);
        }
-        /*
-         * blockdev_direct_IO can return an error even after the I/O
-         * completion handler was called.  Thus we need to protect
-         * against double-freeing.
-         */
-        iocb->private = NULL;
 }
 STATIC ssize_t
@@ -1655,26 +1473,45 @@ xfs_vm_direct_IO(
        loff_t                  offset,
        unsigned long           nr_segs)
 {
-        struct file     *file = iocb->ki_filp;
+        struct inode            *inode = iocb->ki_filp->f_mapping->host;
-        struct inode    *inode = file->f_mapping->host;
+        struct block_device     *bdev = xfs_find_bdev_for_inode(inode);
-        struct block_device *bdev;
+        ssize_t                 ret;
-        ssize_t         ret;
-        bdev = xfs_find_bdev_for_inode(inode);
-        iocb->private = xfs_alloc_ioend(inode, rw == WRITE ?
+        if (rw & WRITE) {
-                                        IO_UNWRITTEN : IO_READ);
+                iocb->private = xfs_alloc_ioend(inode, IO_NEW);
-        ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov,
+                ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
                                            offset, nr_segs,
                                            xfs_get_blocks_direct,
-                                            xfs_end_io_direct);
+                                            xfs_end_io_direct_write, NULL, 0);
+                if (ret != -EIOCBQUEUED && iocb->private)
+                        xfs_destroy_ioend(iocb->private);
+        } else {
+                ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
+                                            offset, nr_segs,
+                                            xfs_get_blocks_direct,
+                                            NULL, NULL, 0);
+        }
-        if (unlikely(ret != -EIOCBQUEUED && iocb->private))
-                xfs_destroy_ioend(iocb->private);
        return ret;
 }
+STATIC void
+xfs_vm_write_failed(
+        struct address_space    *mapping,
+        loff_t                  to)
+{
+        struct inode            *inode = mapping->host;
+        if (to > inode->i_size) {
+                struct iattr    ia = {
+                        .ia_valid       = ATTR_SIZE | ATTR_FORCE,
+                        .ia_size        = inode->i_size,
+                };
+                xfs_setattr(XFS_I(inode), &ia, XFS_ATTR_NOLOCK);
+        }
+}
 STATIC int
 xfs_vm_write_begin(
        struct file             *file,
@@ -1685,9 +1522,31 @@ xfs_vm_write_begin(
        struct page             **pagep,
        void                    **fsdata)
 {
-        *pagep = NULL;
+        int                     ret;
-        return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
-                                                                xfs_get_blocks);
+        ret = block_write_begin(mapping, pos, len, flags | AOP_FLAG_NOFS,
+                                pagep, xfs_get_blocks);
+        if (unlikely(ret))
+                xfs_vm_write_failed(mapping, pos + len);
+        return ret;
+}
+STATIC int
+xfs_vm_write_end(
+        struct file             *file,
+        struct address_space    *mapping,
+        loff_t                  pos,
+        unsigned                len,
+        unsigned                copied,
+        struct page             *page,
+        void                    *fsdata)
+{
+        int                     ret;
+        ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
+        if (unlikely(ret < len))
+                xfs_vm_write_failed(mapping, pos + len);
+        return ret;
 }
 STATIC sector_t
@@ -1698,7 +1557,7 @@ xfs_vm_bmap(
        struct inode            *inode = (struct inode *)mapping->host;
        struct xfs_inode        *ip = XFS_I(inode);
-        xfs_itrace_entry(XFS_I(inode));
+        trace_xfs_vm_bmap(XFS_I(inode));
        xfs_ilock(ip, XFS_IOLOCK_SHARED);
        xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF);
        xfs_iunlock(ip, XFS_IOLOCK_SHARED);
@@ -1732,7 +1591,7 @@ const struct address_space_operations xfs_address_space_operations = {
        .releasepage            = xfs_vm_releasepage,
        .invalidatepage         = xfs_vm_invalidatepage,
        .write_begin            = xfs_vm_write_begin,
-        .write_end              = generic_write_end,
+        .write_end              = xfs_vm_write_end,
        .bmap                   = xfs_vm_bmap,
        .direct_IO              = xfs_vm_direct_IO,
        .migratepage            = buffer_migrate_page,
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
index 4cfc6ea87df8..c5057fb6237a 100644
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -37,6 +37,8 @@ typedef struct xfs_ioend {
        size_t                  io_size;        /* size of the extent */
        xfs_off_t               io_offset;      /* offset in the file */
        struct work_struct      io_work;        /* xfsdatad work queue */
+        struct kiocb            *io_iocb;
+        int                     io_result;
 } xfs_ioend_t;
 extern const struct address_space_operations xfs_address_space_operations;
@@ -45,6 +47,6 @@ extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int);
 extern void xfs_ioend_init(void);
 extern void xfs_ioend_wait(struct xfs_inode *);
-extern void xfs_count_page_state(struct page *, int *, int *, int *);
+extern void xfs_count_page_state(struct page *, int *, int *);
 #endif /* __XFS_AOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 2ee3f7a60163..63fd2c07cb57 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -39,7 +39,6 @@
 #include "xfs_inum.h"
 #include "xfs_log.h"
 #include "xfs_ag.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_trace.h"
@@ -189,8 +188,8 @@ _xfs_buf_initialize(
        atomic_set(&bp->b_hold, 1);
        init_completion(&bp->b_iowait);
        INIT_LIST_HEAD(&bp->b_list);
-        INIT_LIST_HEAD(&bp->b_hash_list);
+        RB_CLEAR_NODE(&bp->b_rbnode);
-        init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */
+        sema_init(&bp->b_sema, 0); /* held, no waiters */
        XB_SET_OWNER(bp);
        bp->b_target = target;
        bp->b_file_offset = range_base;
@@ -263,8 +262,6 @@ xfs_buf_free(
 {
        trace_xfs_buf_free(bp, _RET_IP_);
-        ASSERT(list_empty(&bp->b_hash_list));
        if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) {
                uint            i;
@@ -423,8 +420,10 @@ _xfs_buf_find(
 {
        xfs_off_t               range_base;
        size_t                  range_length;
-        xfs_bufhash_t           *hash;
+        struct xfs_perag        *pag;
-        xfs_buf_t               *bp, *n;
+        struct rb_node          **rbp;
+        struct rb_node          *parent;
+        xfs_buf_t               *bp;
        range_base = (ioff << BBSHIFT);
        range_length = (isize << BBSHIFT);
@@ -433,20 +432,38 @@ _xfs_buf_find(
        ASSERT(!(range_length < (1 << btp->bt_sshift)));
        ASSERT(!(range_base & (xfs_off_t)btp->bt_smask));
-        hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)];
+        /* get tree root */
+        pag = xfs_perag_get(btp->bt_mount,
-        spin_lock(&hash->bh_lock);
+                                xfs_daddr_to_agno(btp->bt_mount, ioff));
-        list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) {
+        /* walk tree */
-                ASSERT(btp == bp->b_target);
+        spin_lock(&pag->pag_buf_lock);
-                if (bp->b_file_offset == range_base &&
+        rbp = &pag->pag_buf_tree.rb_node;
-                    bp->b_buffer_length == range_length) {
+        parent = NULL;
+        bp = NULL;
+        while (*rbp) {
+                parent = *rbp;
+                bp = rb_entry(parent, struct xfs_buf, b_rbnode);
+                if (range_base < bp->b_file_offset)
+                        rbp = &(*rbp)->rb_left;
+                else if (range_base > bp->b_file_offset)
+                        rbp = &(*rbp)->rb_right;
+                else {
                        /*
-                         * If we look at something, bring it to the
+                         * found a block offset match. If the range doesn't
-                         * front of the list for next time.
+                         * match, the only way this is allowed is if the buffer
+                         * in the cache is stale and the transaction that made
+                         * it stale has not yet committed. i.e. we are
+                         * reallocating a busy extent. Skip this buffer and
+                         * continue searching to the right for an exact match.
                         */
+                        if (bp->b_buffer_length != range_length) {
+                                ASSERT(bp->b_flags & XBF_STALE);
+                                rbp = &(*rbp)->rb_right;
+                                continue;
+                        }
                        atomic_inc(&bp->b_hold);
-                        list_move(&bp->b_hash_list, &hash->bh_list);
                        goto found;
                }
        }
@@ -455,17 +472,21 @@ _xfs_buf_find(
        if (new_bp) {
                _xfs_buf_initialize(new_bp, btp, range_base,
                                range_length, flags);
-                new_bp->b_hash = hash;
+                rb_link_node(&new_bp->b_rbnode, parent, rbp);
-                list_add(&new_bp->b_hash_list, &hash->bh_list);
+                rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree);
+                /* the buffer keeps the perag reference until it is freed */
+                new_bp->b_pag = pag;
+                spin_unlock(&pag->pag_buf_lock);
        } else {
                XFS_STATS_INC(xb_miss_locked);
+                spin_unlock(&pag->pag_buf_lock);
+                xfs_perag_put(pag);
        }
-        spin_unlock(&hash->bh_lock);
        return new_bp;
 found:
-        spin_unlock(&hash->bh_lock);
+        spin_unlock(&pag->pag_buf_lock);
+        xfs_perag_put(pag);
        /* Attempt to get the semaphore without sleeping,
         * if this does not work then we need to drop the
@@ -579,9 +600,9 @@ _xfs_buf_read(
                        XBF_READ_AHEAD | _XBF_RUN_QUEUES);
        status = xfs_buf_iorequest(bp);
-        if (!status && !(flags & XBF_ASYNC))
+        if (status || XFS_BUF_ISERROR(bp) || (flags & XBF_ASYNC))
-                status = xfs_buf_iowait(bp);
+                return status;
-        return status;
+        return xfs_buf_iowait(bp);
 }
 xfs_buf_t *
@@ -631,8 +652,7 @@ void
 xfs_buf_readahead(
        xfs_buftarg_t           *target,
        xfs_off_t               ioff,
-        size_t                  isize,
+        size_t                  isize)
-        xfs_buf_flags_t         flags)
 {
        struct backing_dev_info *bdi;
@@ -640,8 +660,42 @@ xfs_buf_readahead(
        if (bdi_read_congested(bdi))
                return;
-        flags |= (XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD);
+        xfs_buf_read(target, ioff, isize,
-        xfs_buf_read(target, ioff, isize, flags);
+                     XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD|XBF_DONT_BLOCK);
+}
+/*
+ * Read an uncached buffer from disk. Allocates and returns a locked
+ * buffer containing the disk contents or nothing.
+ */
+struct xfs_buf *
+xfs_buf_read_uncached(
+        struct xfs_mount        *mp,
+        struct xfs_buftarg      *target,
+        xfs_daddr_t             daddr,
+        size_t                  length,
+        int                     flags)
+{
+        xfs_buf_t               *bp;
+        int                     error;
+        bp = xfs_buf_get_uncached(target, length, flags);
+        if (!bp)
+                return NULL;
+        /* set up the buffer for a read IO */
+        xfs_buf_lock(bp);
+        XFS_BUF_SET_ADDR(bp, daddr);
+        XFS_BUF_READ(bp);
+        XFS_BUF_BUSY(bp);
+        xfsbdstrat(mp, bp);
+        error = xfs_buf_iowait(bp);
+        if (error || bp->b_error) {
+                xfs_buf_relse(bp);
+                return NULL;
+        }
+        return bp;
 }
 xfs_buf_t *
@@ -713,9 +767,10 @@ xfs_buf_associate_memory(
 }
 xfs_buf_t *
-xfs_buf_get_noaddr(
+xfs_buf_get_uncached(
+        struct xfs_buftarg      *target,
        size_t                  len,
-        xfs_buftarg_t           *target)
+        int                     flags)
 {
        unsigned long           page_count = PAGE_ALIGN(len) >> PAGE_SHIFT;
        int                     error, i;
@@ -731,7 +786,7 @@ xfs_buf_get_noaddr(
                goto fail_free_buf;
        for (i = 0; i < page_count; i++) {
-                bp->b_pages[i] = alloc_page(GFP_KERNEL);
+                bp->b_pages[i] = alloc_page(xb_to_gfp(flags));
                if (!bp->b_pages[i])
                        goto fail_free_mem;
        }
@@ -746,7 +801,7 @@ xfs_buf_get_noaddr(
        xfs_buf_unlock(bp);
-        trace_xfs_buf_get_noaddr(bp, _RET_IP_);
+        trace_xfs_buf_get_uncached(bp, _RET_IP_);
        return bp;
 fail_free_mem:
@@ -780,29 +835,30 @@ void
 xfs_buf_rele(
        xfs_buf_t               *bp)
 {
-        xfs_bufhash_t           *hash = bp->b_hash;
+        struct xfs_perag        *pag = bp->b_pag;
        trace_xfs_buf_rele(bp, _RET_IP_);
-        if (unlikely(!hash)) {
+        if (!pag) {
                ASSERT(!bp->b_relse);
+                ASSERT(RB_EMPTY_NODE(&bp->b_rbnode));
                if (atomic_dec_and_test(&bp->b_hold))
                        xfs_buf_free(bp);
                return;
        }
+        ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode));
        ASSERT(atomic_read(&bp->b_hold) > 0);
-        if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) {
+        if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) {
                if (bp->b_relse) {
                        atomic_inc(&bp->b_hold);
-                        spin_unlock(&hash->bh_lock);
+                        spin_unlock(&pag->pag_buf_lock);
-                        (*(bp->b_relse)) (bp);
+                        bp->b_relse(bp);
-                } else if (bp->b_flags & XBF_FS_MANAGED) {
-                        spin_unlock(&hash->bh_lock);
                } else {
                        ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)));
-                        list_del_init(&bp->b_hash_list);
+                        rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
-                        spin_unlock(&hash->bh_lock);
+                        spin_unlock(&pag->pag_buf_lock);
+                        xfs_perag_put(pag);
                        xfs_buf_free(bp);
                }
        }
@@ -865,7 +921,7 @@ xfs_buf_lock(
        trace_xfs_buf_lock(bp, _RET_IP_);
        if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
-                xfs_log_force(bp->b_mount, 0);
+                xfs_log_force(bp->b_target->bt_mount, 0);
        if (atomic_read(&bp->b_io_remaining))
                blk_run_address_space(bp->b_target->bt_mapping);
        down(&bp->b_sema);
@@ -897,36 +953,6 @@ xfs_buf_unlock(
        trace_xfs_buf_unlock(bp, _RET_IP_);
 }
-/*
- *      Pinning Buffer Storage in Memory
- *      Ensure that no attempt to force a buffer to disk will succeed.
- */
-void
-xfs_buf_pin(
-        xfs_buf_t               *bp)
-{
-        trace_xfs_buf_pin(bp, _RET_IP_);
-        atomic_inc(&bp->b_pin_count);
-}
-void
-xfs_buf_unpin(
-        xfs_buf_t               *bp)
-{
-        trace_xfs_buf_unpin(bp, _RET_IP_);
-        if (atomic_dec_and_test(&bp->b_pin_count))
-                wake_up_all(&bp->b_waiters);
-}
-int
-xfs_buf_ispin(
-        xfs_buf_t               *bp)
-{
-        return atomic_read(&bp->b_pin_count);
-}
 STATIC void
 xfs_buf_wait_unpin(
        xfs_buf_t               *bp)
@@ -960,19 +986,7 @@ xfs_buf_iodone_work(
        xfs_buf_t               *bp =
                container_of(work, xfs_buf_t, b_iodone_work);
-        /*
+        if (bp->b_iodone)
-         * We can get an EOPNOTSUPP to ordered writes.  Here we clear the
-         * ordered flag and reissue them.  Because we can't tell the higher
-         * layers directly that they should not issue ordered I/O anymore, they
-         * need to check if the _XFS_BARRIER_FAILED flag was set during I/O completion.
-         */
-        if ((bp->b_error == EOPNOTSUPP) &&
-            (bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) {
-                trace_xfs_buf_ordered_retry(bp, _RET_IP_);
-                bp->b_flags &= ~XBF_ORDERED;
-                bp->b_flags |= _XFS_BARRIER_FAILED;
-                xfs_buf_iorequest(bp);
-        } else if (bp->b_iodone)
                (*(bp->b_iodone))(bp);
        else if (bp->b_flags & XBF_ASYNC)
                xfs_buf_relse(bp);
@@ -1018,13 +1032,11 @@ xfs_bwrite(
 {
        int                     error;
-        bp->b_strat = xfs_bdstrat_cb;
-        bp->b_mount = mp;
        bp->b_flags |= XBF_WRITE;
        bp->b_flags &= ~(XBF_ASYNC | XBF_READ);
        xfs_buf_delwri_dequeue(bp);
-        xfs_buf_iostrategy(bp);
+        xfs_bdstrat_cb(bp);
        error = xfs_buf_iowait(bp);
        if (error)
@@ -1040,9 +1052,6 @@ xfs_bdwrite(
 {
        trace_xfs_buf_bdwrite(bp, _RET_IP_);
-        bp->b_strat = xfs_bdstrat_cb;
-        bp->b_mount = mp;
        bp->b_flags &= ~XBF_READ;
        bp->b_flags |= (XBF_DELWRI | XBF_ASYNC);
@@ -1051,7 +1060,7 @@ xfs_bdwrite(
 /*
 * Called when we want to stop a buffer from getting written or read.
- * We attach the EIO error, muck with its flags, and call biodone
+ * We attach the EIO error, muck with its flags, and call xfs_buf_ioend
 * so that the proper iodone callbacks get called.
 */
 STATIC int
@@ -1068,22 +1077,21 @@ xfs_bioerror(
        XFS_BUF_ERROR(bp, EIO);
        /*
-         * We're calling biodone, so delete XBF_DONE flag.
+         * We're calling xfs_buf_ioend, so delete XBF_DONE flag.
         */
        XFS_BUF_UNREAD(bp);
        XFS_BUF_UNDELAYWRITE(bp);
        XFS_BUF_UNDONE(bp);
        XFS_BUF_STALE(bp);
-        XFS_BUF_CLR_BDSTRAT_FUNC(bp);
+        xfs_buf_ioend(bp, 0);
-        xfs_biodone(bp);
        return EIO;
 }
 /*
 * Same as xfs_bioerror, except that we are releasing the buffer
- * here ourselves, and avoiding the biodone call.
+ * here ourselves, and avoiding the xfs_buf_ioend call.
 * This is meant for userdata errors; metadata bufs come with
 * iodone functions attached, so that we can track down errors.
 */
@@ -1105,7 +1113,6 @@ xfs_bioerror_relse(
        XFS_BUF_DONE(bp);
        XFS_BUF_STALE(bp);
        XFS_BUF_CLR_IODONE_FUNC(bp);
-        XFS_BUF_CLR_BDSTRAT_FUNC(bp);
        if (!(fl & XBF_ASYNC)) {
                /*
                 * Mark b_error and B_ERROR _both_.
@@ -1133,7 +1140,7 @@ int
 xfs_bdstrat_cb(
        struct xfs_buf  *bp)
 {
-        if (XFS_FORCED_SHUTDOWN(bp->b_mount)) {
+        if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
                trace_xfs_bdstrat_shut(bp, _RET_IP_);
                /*
                 * Metadata write that didn't get logged but
@@ -1235,7 +1242,7 @@ _xfs_buf_ioapply(
        if (bp->b_flags & XBF_ORDERED) {
                ASSERT(!(bp->b_flags & XBF_READ));
-                rw = WRITE_BARRIER;
+                rw = WRITE_FLUSH_FUA;
        } else if (bp->b_flags & XBF_LOG_BUFFER) {
                ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
                bp->b_flags &= ~_XBF_RUN_QUEUES;
@@ -1311,8 +1318,19 @@ submit_io:
                if (size)
                        goto next_chunk;
        } else {
-                bio_put(bio);
+                /*
+                 * if we get here, no pages were added to the bio. However,
+                 * we can't just error out here - if the pages are locked then
+                 * we have to unlock them otherwise we can hang on a later
+                 * access to the page.
+                 */
                xfs_buf_ioerror(bp, EIO);
+                if (bp->b_flags & _XBF_PAGE_LOCKED) {
+                        int i;
+                        for (i = 0; i < bp->b_page_count; i++)
+                                unlock_page(bp->b_pages[i]);
+                }
+                bio_put(bio);
        }
 }
@@ -1428,63 +1446,24 @@ xfs_buf_iomove(
 */
 void
 xfs_wait_buftarg(
-        xfs_buftarg_t   *btp)
+        struct xfs_buftarg      *btp)
-{
-        xfs_buf_t       *bp, *n;
-        xfs_bufhash_t   *hash;
-        uint            i;
-        for (i = 0; i < (1 << btp->bt_hashshift); i++) {
-                hash = &btp->bt_hash[i];
-again:
-                spin_lock(&hash->bh_lock);
-                list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) {
-                        ASSERT(btp == bp->b_target);
-                        if (!(bp->b_flags & XBF_FS_MANAGED)) {
-                                spin_unlock(&hash->bh_lock);
-                                /*
-                                 * Catch superblock reference count leaks
-                                 * immediately
-                                 */
-                                BUG_ON(bp->b_bn == 0);
-                                delay(100);
-                                goto again;
-                        }
-                }
-                spin_unlock(&hash->bh_lock);
-        }
-}
-/*
- *      Allocate buffer hash table for a given target.
- *      For devices containing metadata (i.e. not the log/realtime devices)
- *      we need to allocate a much larger hash table.
- */
-STATIC void
-xfs_alloc_bufhash(
-        xfs_buftarg_t           *btp,
-        int                     external)
 {
-        unsigned int            i;
+        struct xfs_perag        *pag;
+        uint                    i;
-        btp->bt_hashshift = external ? 3 : 8;   /* 8 or 256 buckets */
+        for (i = 0; i < btp->bt_mount->m_sb.sb_agcount; i++) {
-        btp->bt_hashmask = (1 << btp->bt_hashshift) - 1;
+                pag = xfs_perag_get(btp->bt_mount, i);
-        btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) *
+                spin_lock(&pag->pag_buf_lock);
-                                         sizeof(xfs_bufhash_t));
+                while (rb_first(&pag->pag_buf_tree)) {
-        for (i = 0; i < (1 << btp->bt_hashshift); i++) {
+                        spin_unlock(&pag->pag_buf_lock);
-                spin_lock_init(&btp->bt_hash[i].bh_lock);
+                        delay(100);
-                INIT_LIST_HEAD(&btp->bt_hash[i].bh_list);
+                        spin_lock(&pag->pag_buf_lock);
+                }
+                spin_unlock(&pag->pag_buf_lock);
+                xfs_perag_put(pag);
        }
 }
-STATIC void
-xfs_free_bufhash(
-        xfs_buftarg_t           *btp)
-{
-        kmem_free_large(btp->bt_hash);
-        btp->bt_hash = NULL;
-}
 /*
 *      buftarg list for delwrite queue processing
 */
@@ -1517,7 +1496,6 @@ xfs_free_buftarg(
        xfs_flush_buftarg(btp, 1);
        if (mp->m_flags & XFS_MOUNT_BARRIER)
                xfs_blkdev_issue_flush(btp);
-        xfs_free_bufhash(btp);
        iput(btp->bt_mapping->host);
        /* Unregister the buftarg first so that we don't get a
@@ -1602,6 +1580,7 @@ xfs_mapping_buftarg(
                        XFS_BUFTARG_NAME(btp));
                return ENOMEM;
        }
+        inode->i_ino = get_next_ino();
        inode->i_mode = S_IFBLK;
        inode->i_bdev = bdev;
        inode->i_rdev = bdev->bd_dev;
@@ -1639,6 +1618,7 @@ out_error:
 xfs_buftarg_t *
 xfs_alloc_buftarg(
+        struct xfs_mount        *mp,
        struct block_device     *bdev,
        int                     external,
        const char              *fsname)
@@ -1647,6 +1627,7 @@ xfs_alloc_buftarg(
        btp = kmem_zalloc(sizeof(*btp), KM_SLEEP);
+        btp->bt_mount = mp;
        btp->bt_dev =  bdev->bd_dev;
        btp->bt_bdev = bdev;
        if (xfs_setsize_buftarg_early(btp, bdev))
@@ -1655,7 +1636,6 @@ xfs_alloc_buftarg(
                goto error;
        if (xfs_alloc_delwrite_queue(btp, fsname))
                goto error;
-        xfs_alloc_bufhash(btp, external);
        return btp;
 error:
@@ -1804,7 +1784,7 @@ xfs_buf_delwri_split(
                trace_xfs_buf_delwri_split(bp, _RET_IP_);
                ASSERT(bp->b_flags & XBF_DELWRI);
-                if (!xfs_buf_ispin(bp) && !xfs_buf_cond_lock(bp)) {
+                if (!XFS_BUF_ISPINNED(bp) && !xfs_buf_cond_lock(bp)) {
                        if (!force &&
                            time_before(jiffies, bp->b_queuetime + age)) {
                                xfs_buf_unlock(bp);
@@ -1889,7 +1869,7 @@ xfsbufd(
                        struct xfs_buf *bp;
                        bp = list_first_entry(&tmp, struct xfs_buf, b_list);
                        list_del_init(&bp->b_list);
-                        xfs_buf_iostrategy(bp);
+                        xfs_bdstrat_cb(bp);
                        count++;
                }
                if (count)
@@ -1936,7 +1916,7 @@ xfs_flush_buftarg(
                        bp->b_flags &= ~XBF_ASYNC;
                        list_add(&bp->b_list, &wait_list);
                }
-                xfs_buf_iostrategy(bp);
+                xfs_bdstrat_cb(bp);
        }
        if (wait) {
@@ -1946,7 +1926,7 @@ xfs_flush_buftarg(
                        bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
                        list_del_init(&bp->b_list);
-                        xfs_iowait(bp);
+                        xfs_buf_iowait(bp);
                        xfs_buf_relse(bp);
                }
        }
@@ -1962,7 +1942,8 @@ xfs_buf_init(void)
        if (!xfs_buf_zone)
                goto out;
-        xfslogd_workqueue = create_workqueue("xfslogd");
+        xfslogd_workqueue = alloc_workqueue("xfslogd",
+                                        WQ_MEM_RECLAIM | WQ_HIGHPRI, 1);
        if (!xfslogd_workqueue)
                goto out_free_buf_zone;
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 5fbecefa5dfd..383a3f37cf98 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -44,57 +44,48 @@ typedef enum {
        XBRW_ZERO = 3,                  /* Zero target memory */
 } xfs_buf_rw_t;
-typedef enum {
+#define XBF_READ        (1 << 0) /* buffer intended for reading from device */
-        XBF_READ = (1 << 0),    /* buffer intended for reading from device */
+#define XBF_WRITE       (1 << 1) /* buffer intended for writing to device */
-        XBF_WRITE = (1 << 1),   /* buffer intended for writing to device   */
+#define XBF_MAPPED      (1 << 2) /* buffer mapped (b_addr valid) */
-        XBF_MAPPED = (1 << 2),  /* buffer mapped (b_addr valid)            */
+#define XBF_ASYNC       (1 << 4) /* initiator will not wait for completion */
-        XBF_ASYNC = (1 << 4),   /* initiator will not wait for completion  */
+#define XBF_DONE        (1 << 5) /* all pages in the buffer uptodate */
-        XBF_DONE = (1 << 5),    /* all pages in the buffer uptodate        */
+#define XBF_DELWRI      (1 << 6) /* buffer has dirty pages */
-        XBF_DELWRI = (1 << 6),  /* buffer has dirty pages                  */
+#define XBF_STALE       (1 << 7) /* buffer has been staled, do not find it */
-        XBF_STALE = (1 << 7),   /* buffer has been staled, do not find it  */
+#define XBF_ORDERED     (1 << 11)/* use ordered writes */
-        XBF_FS_MANAGED = (1 << 8),  /* filesystem controls freeing memory  */
+#define XBF_READ_AHEAD  (1 << 12)/* asynchronous read-ahead */
-        XBF_ORDERED = (1 << 11),    /* use ordered writes                  */
+#define XBF_LOG_BUFFER  (1 << 13)/* this is a buffer used for the log */
-        XBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead             */
-        XBF_LOG_BUFFER = (1 << 13), /* this is a buffer used for the log   */
+/* flags used only as arguments to access routines */
+#define XBF_LOCK        (1 << 14)/* lock requested */
-        /* flags used only as arguments to access routines */
+#define XBF_TRYLOCK     (1 << 15)/* lock requested, but do not wait */
-        XBF_LOCK = (1 << 14),       /* lock requested                      */
+#define XBF_DONT_BLOCK  (1 << 16)/* do not block in current thread */
-        XBF_TRYLOCK = (1 << 15),    /* lock requested, but do not wait     */
-        XBF_DONT_BLOCK = (1 << 16), /* do not block in current thread      */
+/* flags used only internally */
+#define _XBF_PAGE_CACHE (1 << 17)/* backed by pagecache */
-        /* flags used only internally */
+#define _XBF_PAGES      (1 << 18)/* backed by refcounted pages */
-        _XBF_PAGE_CACHE = (1 << 17),/* backed by pagecache                 */
+#define _XBF_RUN_QUEUES (1 << 19)/* run block device task queue */
-        _XBF_PAGES = (1 << 18),     /* backed by refcounted pages          */
+#define _XBF_DELWRI_Q   (1 << 21)/* buffer on delwri queue */
-        _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue         */
-        _XBF_DELWRI_Q = (1 << 21),   /* buffer on delwri queue             */
-        /*
+/*
-         * Special flag for supporting metadata blocks smaller than a FSB.
+ * Special flag for supporting metadata blocks smaller than a FSB.
-         *
+ *
-         * In this case we can have multiple xfs_buf_t on a single page and
+ * In this case we can have multiple xfs_buf_t on a single page and
-         * need to lock out concurrent xfs_buf_t readers as they only
+ * need to lock out concurrent xfs_buf_t readers as they only
-         * serialise access to the buffer.
+ * serialise access to the buffer.
-         *
+ *
-         * If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation
+ * If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation
-         * between reads of the page. Hence we can have one thread read the
+ * between reads of the page. Hence we can have one thread read the
-         * page and modify it, but then race with another thread that thinks
+ * page and modify it, but then race with another thread that thinks
-         * the page is not up-to-date and hence reads it again.
+ * the page is not up-to-date and hence reads it again.
-         *
+ *
-         * The result is that the first modifcation to the page is lost.
+ * The result is that the first modifcation to the page is lost.
-         * This sort of AGF/AGI reading race can happen when unlinking inodes
+ * This sort of AGF/AGI reading race can happen when unlinking inodes
-         * that require truncation and results in the AGI unlinked list
+ * that require truncation and results in the AGI unlinked list
-         * modifications being lost.
+ * modifications being lost.
-         */
+ */
-        _XBF_PAGE_LOCKED = (1 << 22),
+#define _XBF_PAGE_LOCKED        (1 << 22)
-        /*
+typedef unsigned int xfs_buf_flags_t;
-         * If we try a barrier write, but it fails we have to communicate
-         * this to the upper layers.  Unfortunately b_error gets overwritten
-         * when the buffer is re-issued so we have to add another flag to
-         * keep this information.
-         */
-        _XFS_BARRIER_FAILED = (1 << 23),
-} xfs_buf_flags_t;
 #define XFS_BUF_FLAGS \
        { XBF_READ,             "READ" }, \
@@ -104,7 +95,6 @@ typedef enum {
        { XBF_DONE,             "DONE" }, \
        { XBF_DELWRI,           "DELWRI" }, \
        { XBF_STALE,            "STALE" }, \
-        { XBF_FS_MANAGED,       "FS_MANAGED" }, \
        { XBF_ORDERED,          "ORDERED" }, \
        { XBF_READ_AHEAD,       "READ_AHEAD" }, \
        { XBF_LOCK,             "LOCK" },       /* should never be set */\
@@ -114,8 +104,7 @@ typedef enum {
        { _XBF_PAGES,           "PAGES" }, \
        { _XBF_RUN_QUEUES,      "RUN_QUEUES" }, \
        { _XBF_DELWRI_Q,        "DELWRI_Q" }, \
-        { _XBF_PAGE_LOCKED,     "PAGE_LOCKED" }, \
+        { _XBF_PAGE_LOCKED,     "PAGE_LOCKED" }
-        { _XFS_BARRIER_FAILED,  "BARRIER_FAILED" }
 typedef enum {
@@ -132,15 +121,11 @@ typedef struct xfs_buftarg {
        dev_t                   bt_dev;
        struct block_device     *bt_bdev;
        struct address_space    *bt_mapping;
+        struct xfs_mount        *bt_mount;
        unsigned int            bt_bsize;
        unsigned int            bt_sshift;
        size_t                  bt_smask;
-        /* per device buffer hash table */
-        uint                    bt_hashmask;
-        uint                    bt_hashshift;
-        xfs_bufhash_t           *bt_hash;
        /* per device delwri queue */
        struct task_struct      *bt_task;
        struct list_head        bt_list;
@@ -168,35 +153,41 @@ typedef int (*xfs_buf_bdstrat_t)(struct xfs_buf *);
 #define XB_PAGES        2
 typedef struct xfs_buf {
+        /*
+         * first cacheline holds all the fields needed for an uncontended cache
+         * hit to be fully processed. The semaphore straddles the cacheline
+         * boundary, but the counter and lock sits on the first cacheline,
+         * which is the only bit that is touched if we hit the semaphore
+         * fast-path on locking.
+         */
+        struct rb_node          b_rbnode;       /* rbtree node */
+        xfs_off_t               b_file_offset;  /* offset in file */
+        size_t                  b_buffer_length;/* size of buffer in bytes */
+        atomic_t                b_hold;         /* reference count */
+        xfs_buf_flags_t         b_flags;        /* status flags */
        struct semaphore        b_sema;         /* semaphore for lockables */
-        unsigned long           b_queuetime;    /* time buffer was queued */
-        atomic_t                b_pin_count;    /* pin count */
        wait_queue_head_t       b_waiters;      /* unpin waiters */
        struct list_head        b_list;
-        xfs_buf_flags_t         b_flags;        /* status flags */
+        struct xfs_perag        *b_pag;         /* contains rbtree root */
-        struct list_head        b_hash_list;    /* hash table list */
-        xfs_bufhash_t           *b_hash;        /* hash table list start */
        xfs_buftarg_t           *b_target;      /* buffer target (device) */
-        atomic_t                b_hold;         /* reference count */
        xfs_daddr_t             b_bn;           /* block number for I/O */
-        xfs_off_t               b_file_offset;  /* offset in file */
-        size_t                  b_buffer_length;/* size of buffer in bytes */
        size_t                  b_count_desired;/* desired transfer size */
        void                    *b_addr;        /* virtual address of buffer */
        struct work_struct      b_iodone_work;
-        atomic_t                b_io_remaining; /* #outstanding I/O requests */
        xfs_buf_iodone_t        b_iodone;       /* I/O completion function */
        xfs_buf_relse_t         b_relse;        /* releasing function */
-        xfs_buf_bdstrat_t       b_strat;        /* pre-write function */
        struct completion       b_iowait;       /* queue for I/O waiters */
        void                    *b_fspriv;
        void                    *b_fspriv2;
-        struct xfs_mount        *b_mount;
-        unsigned short          b_error;        /* error code on I/O */
-        unsigned int            b_page_count;   /* size of page array */
-        unsigned int            b_offset;       /* page offset in first page */
        struct page             **b_pages;      /* array of page pointers */
        struct page             *b_page_array[XB_PAGES]; /* inline pages */
+        unsigned long           b_queuetime;    /* time buffer was queued */
+        atomic_t                b_pin_count;    /* pin count */
+        atomic_t                b_io_remaining; /* #outstanding I/O requests */
+        unsigned int            b_page_count;   /* size of page array */
+        unsigned int            b_offset;       /* page offset in first page */
+        unsigned short          b_error;        /* error code on I/O */
 #ifdef XFS_BUF_LOCK_TRACKING
        int                     b_last_holder;
 #endif
@@ -215,11 +206,13 @@ extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t,
                                xfs_buf_flags_t);
 extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *);
-extern xfs_buf_t *xfs_buf_get_noaddr(size_t, xfs_buftarg_t *);
+extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int);
 extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t);
 extern void xfs_buf_hold(xfs_buf_t *);
-extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t,
+extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t);
-                                xfs_buf_flags_t);
+struct xfs_buf *xfs_buf_read_uncached(struct xfs_mount *mp,
+                                struct xfs_buftarg *target,
+                                xfs_daddr_t daddr, size_t length, int flags);
 /* Releasing Buffers */
 extern void xfs_buf_free(xfs_buf_t *);
@@ -244,11 +237,8 @@ extern int xfs_buf_iorequest(xfs_buf_t *);
 extern int xfs_buf_iowait(xfs_buf_t *);
 extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
                                xfs_buf_rw_t);
+#define xfs_buf_zero(bp, off, len) \
-static inline int xfs_buf_iostrategy(xfs_buf_t *bp)
+            xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
-{
-        return bp->b_strat ? bp->b_strat(bp) : xfs_buf_iorequest(bp);
-}
 static inline int xfs_buf_geterror(xfs_buf_t *bp)
 {
@@ -258,11 +248,6 @@ static inline int xfs_buf_geterror(xfs_buf_t *bp)
 /* Buffer Utility Routines */
 extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t);
-/* Pinning Buffer Storage in Memory */
-extern void xfs_buf_pin(xfs_buf_t *);
-extern void xfs_buf_unpin(xfs_buf_t *);
-extern int xfs_buf_ispin(xfs_buf_t *);
 /* Delayed Write Buffer Routines */
 extern void xfs_buf_delwri_dequeue(xfs_buf_t *);
 extern void xfs_buf_delwri_promote(xfs_buf_t *);
@@ -288,8 +273,6 @@ extern void xfs_buf_terminate(void);
                                        XFS_BUF_DONE(bp);       \
                                } while (0)
-#define XFS_BUF_UNMANAGE(bp)    ((bp)->b_flags &= ~XBF_FS_MANAGED)
 #define XFS_BUF_DELAYWRITE(bp)          ((bp)->b_flags |= XBF_DELWRI)
 #define XFS_BUF_UNDELAYWRITE(bp)        xfs_buf_delwri_dequeue(bp)
 #define XFS_BUF_ISDELAYWRITE(bp)        ((bp)->b_flags & XBF_DELWRI)
@@ -326,8 +309,6 @@ extern void xfs_buf_terminate(void);
 #define XFS_BUF_IODONE_FUNC(bp)                 ((bp)->b_iodone)
 #define XFS_BUF_SET_IODONE_FUNC(bp, func)       ((bp)->b_iodone = (func))
 #define XFS_BUF_CLR_IODONE_FUNC(bp)             ((bp)->b_iodone = NULL)
-#define XFS_BUF_SET_BDSTRAT_FUNC(bp, func)      ((bp)->b_strat = (func))
-#define XFS_BUF_CLR_BDSTRAT_FUNC(bp)            ((bp)->b_strat = NULL)
 #define XFS_BUF_FSPRIVATE(bp, type)             ((type)(bp)->b_fspriv)
 #define XFS_BUF_SET_FSPRIVATE(bp, val)          ((bp)->b_fspriv = (void*)(val))
@@ -351,7 +332,7 @@ extern void xfs_buf_terminate(void);
 #define XFS_BUF_SET_VTYPE(bp, type)             do { } while (0)
 #define XFS_BUF_SET_REF(bp, ref)                do { } while (0)
-#define XFS_BUF_ISPINNED(bp)    xfs_buf_ispin(bp)
+#define XFS_BUF_ISPINNED(bp)    atomic_read(&((bp)->b_pin_count))
 #define XFS_BUF_VALUSEMA(bp)    xfs_buf_lock_value(bp)
 #define XFS_BUF_CPSEMA(bp)      (xfs_buf_cond_lock(bp) == 0)
@@ -370,27 +351,11 @@ static inline void xfs_buf_relse(xfs_buf_t *bp)
        xfs_buf_rele(bp);
 }
-#define xfs_bpin(bp)            xfs_buf_pin(bp)
-#define xfs_bunpin(bp)          xfs_buf_unpin(bp)
-#define xfs_biodone(bp)         xfs_buf_ioend(bp, 0)
-#define xfs_biomove(bp, off, len, data, rw) \
-            xfs_buf_iomove((bp), (off), (len), (data), \
-                ((rw) == XBF_WRITE) ? XBRW_WRITE : XBRW_READ)
-#define xfs_biozero(bp, off, len) \
-            xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
-#define xfs_iowait(bp)  xfs_buf_iowait(bp)
-#define xfs_baread(target, rablkno, ralen)  \
-        xfs_buf_readahead((target), (rablkno), (ralen), XBF_DONT_BLOCK)
 /*
 *      Handling of buftargs.
 */
-extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int, const char *);
+extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *,
+                        struct block_device *, int, const char *);
 extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *);
 extern void xfs_wait_buftarg(xfs_buftarg_t *);
 extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
diff --git a/fs/xfs/linux-2.6/xfs_cred.h b/fs/xfs/linux-2.6/xfs_cred.h
deleted file mode 100644
index 55bddf3b6091..000000000000
--- a/fs/xfs/linux-2.6/xfs_cred.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_CRED_H__
-#define __XFS_CRED_H__
-#include <linux/capability.h>
-/*
- * Credentials
- */
-typedef const struct cred cred_t;
-#endif  /* __XFS_CRED_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_dmapi_priv.h b/fs/xfs/linux-2.6/xfs_dmapi_priv.h
deleted file mode 100644
index a8b0b1685eed..000000000000
--- a/fs/xfs/linux-2.6/xfs_dmapi_priv.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_DMAPI_PRIV_H__
-#define __XFS_DMAPI_PRIV_H__
-/*
- *      Based on IO_ISDIRECT, decide which i_ flag is set.
- */
-#define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \
-                              DM_FLAGS_IMUX : 0)
-#define DM_SEM_FLAG_WR  (DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_IMUX)
-#endif /*__XFS_DMAPI_PRIV_H__*/
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index e7839ee49e43..3764d74790ec 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -23,13 +23,13 @@
 #include "xfs_sb.h"
 #include "xfs_ag.h"
 #include "xfs_dir2.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_export.h"
 #include "xfs_vnodeops.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_inode.h"
 #include "xfs_inode_item.h"
+#include "xfs_trace.h"
 /*
 * Note that we only accept fileids which are long enough rather than allow
@@ -132,8 +132,7 @@ xfs_nfs_get_inode(
         * fine and not an indication of a corrupted filesystem as clients can
         * send invalid file handles and we have to handle it gracefully..
         */
-        error = xfs_iget(mp, NULL, ino, XFS_IGET_UNTRUSTED,
+        error = xfs_iget(mp, NULL, ino, XFS_IGET_UNTRUSTED, 0, &ip);
-                         XFS_ILOCK_SHARED, &ip);
        if (error) {
                /*
                 * EINVAL means the inode cluster doesn't exist anymore.
@@ -148,11 +147,10 @@ xfs_nfs_get_inode(
        }
        if (ip->i_d.di_gen != generation) {
-                xfs_iput_new(ip, XFS_ILOCK_SHARED);
+                IRELE(ip);
                return ERR_PTR(-ENOENT);
        }
-        xfs_iunlock(ip, XFS_ILOCK_SHARED);
        return VFS_I(ip);
 }
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 257a56b127cf..ba8ad422a165 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -22,23 +22,15 @@
 #include "xfs_inum.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
 #include "xfs_trans.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
 #include "xfs_alloc.h"
-#include "xfs_btree.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dir2_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_inode_item.h"
 #include "xfs_bmap.h"
 #include "xfs_error.h"
-#include "xfs_rw.h"
 #include "xfs_vnodeops.h"
 #include "xfs_da_btree.h"
 #include "xfs_ioctl.h"
@@ -108,7 +100,7 @@ xfs_file_fsync(
        int                     error = 0;
        int                     log_flushed = 0;
-        xfs_itrace_entry(ip);
+        trace_xfs_file_fsync(ip);
        if (XFS_FORCED_SHUTDOWN(ip->i_mount))
                return -XFS_ERROR(EIO);
@@ -166,8 +158,7 @@ xfs_file_fsync(
                 * transaction.  So we play it safe and fire off the
                 * transaction anyway.
                 */
-                xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+                xfs_trans_ijoin(tp, ip);
-                xfs_trans_ihold(tp, ip);
                xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
                xfs_trans_set_sync(tp);
                error = _xfs_trans_commit(tp, 0, &log_flushed);
@@ -275,20 +266,6 @@ xfs_file_aio_read(
                mutex_lock(&inode->i_mutex);
        xfs_ilock(ip, XFS_IOLOCK_SHARED);
-        if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
-                int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags);
-                int iolock = XFS_IOLOCK_SHARED;
-                ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, iocb->ki_pos, size,
-                                        dmflags, &iolock);
-                if (ret) {
-                        xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-                        if (unlikely(ioflags & IO_ISDIRECT))
-                                mutex_unlock(&inode->i_mutex);
-                        return ret;
-                }
-        }
        if (unlikely(ioflags & IO_ISDIRECT)) {
                if (inode->i_mapping->nrpages) {
                        ret = -xfs_flushinval_pages(ip,
@@ -321,7 +298,6 @@ xfs_file_splice_read(
        unsigned int            flags)
 {
        struct xfs_inode        *ip = XFS_I(infilp->f_mapping->host);
-        struct xfs_mount        *mp = ip->i_mount;
        int                     ioflags = 0;
        ssize_t                 ret;
@@ -335,18 +311,6 @@ xfs_file_splice_read(
        xfs_ilock(ip, XFS_IOLOCK_SHARED);
-        if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
-                int iolock = XFS_IOLOCK_SHARED;
-                int error;
-                error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *ppos, count,
-                                        FILP_DELAY_FLAG(infilp), &iolock);
-                if (error) {
-                        xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-                        return -error;
-                }
-        }
        trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
        ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
@@ -367,7 +331,6 @@ xfs_file_splice_write(
 {
        struct inode            *inode = outfilp->f_mapping->host;
        struct xfs_inode        *ip = XFS_I(inode);
-        struct xfs_mount        *mp = ip->i_mount;
        xfs_fsize_t             isize, new_size;
        int                     ioflags = 0;
        ssize_t                 ret;
@@ -382,18 +345,6 @@ xfs_file_splice_write(
        xfs_ilock(ip, XFS_IOLOCK_EXCL);
-        if (DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS)) {
-                int iolock = XFS_IOLOCK_EXCL;
-                int error;
-                error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, *ppos, count,
-                                        FILP_DELAY_FLAG(outfilp), &iolock);
-                if (error) {
-                        xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-                        return -error;
-                }
-        }
        new_size = *ppos + count;
        xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -463,7 +414,7 @@ xfs_zero_last_block(
        last_fsb = XFS_B_TO_FSBT(mp, isize);
        nimaps = 1;
        error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap,
-                          &nimaps, NULL, NULL);
+                          &nimaps, NULL);
        if (error) {
                return error;
        }
@@ -558,7 +509,7 @@ xfs_zero_eof(
                nimaps = 1;
                zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
                error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb,
-                                  0, NULL, 0, &imap, &nimaps, NULL, NULL);
+                                  0, NULL, 0, &imap, &nimaps, NULL);
                if (error) {
                        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
                        return error;
@@ -627,7 +578,6 @@ xfs_file_aio_write(
        int                     ioflags = 0;
        xfs_fsize_t             isize, new_size;
        int                     iolock;
-        int                     eventsent = 0;
        size_t                  ocount = 0, count;
        int                     need_i_mutex;
@@ -673,33 +623,6 @@ start:
                goto out_unlock_mutex;
        }
-        if ((DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) &&
-            !(ioflags & IO_INVIS) && !eventsent)) {
-                int             dmflags = FILP_DELAY_FLAG(file);
-                if (need_i_mutex)
-                        dmflags |= DM_FLAGS_IMUX;
-                xfs_iunlock(ip, XFS_ILOCK_EXCL);
-                error = XFS_SEND_DATA(ip->i_mount, DM_EVENT_WRITE, ip,
-                                      pos, count, dmflags, &iolock);
-                if (error) {
-                        goto out_unlock_internal;
-                }
-                xfs_ilock(ip, XFS_ILOCK_EXCL);
-                eventsent = 1;
-                /*
-                 * The iolock was dropped and reacquired in XFS_SEND_DATA
-                 * so we have to recheck the size when appending.
-                 * We will only "goto start;" once, since having sent the
-                 * event prevents another call to XFS_SEND_DATA, which is
-                 * what allows the size to change in the first place.
-                 */
-                if ((file->f_flags & O_APPEND) && pos != ip->i_size)
-                        goto start;
-        }
        if (ioflags & IO_ISDIRECT) {
                xfs_buftarg_t   *target =
                        XFS_IS_REALTIME_INODE(ip) ?
@@ -830,22 +753,6 @@ write_retry:
                xfs_iunlock(ip, XFS_ILOCK_EXCL);
        }
-        if (ret == -ENOSPC &&
-            DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) {
-                xfs_iunlock(ip, iolock);
-                if (need_i_mutex)
-                        mutex_unlock(&inode->i_mutex);
-                error = XFS_SEND_NAMESP(ip->i_mount, DM_EVENT_NOSPACE, ip,
-                                DM_RIGHT_NULL, ip, DM_RIGHT_NULL, NULL, NULL,
-                                0, 0, 0); /* Delay flag intentionally  unused */
-                if (need_i_mutex)
-                        mutex_lock(&inode->i_mutex);
-                xfs_ilock(ip, iolock);
-                if (error)
-                        goto out_unlock_internal;
-                goto start;
-        }
        error = -ret;
        if (ret <= 0)
                goto out_unlock_internal;
@@ -1014,9 +921,6 @@ const struct file_operations xfs_file_operations = {
        .open           = xfs_file_open,
        .release        = xfs_file_release,
        .fsync          = xfs_file_fsync,
-#ifdef HAVE_FOP_OPEN_EXEC
-        .open_exec      = xfs_file_open_exec,
-#endif
 };
 const struct file_operations xfs_dir_file_operations = {
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
index b6918d76bc7b..ed88ed16811c 100644
--- a/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -21,10 +21,6 @@
 #include "xfs_inode.h"
 #include "xfs_trace.h"
-int  fs_noerr(void) { return 0; }
-int  fs_nosys(void) { return ENOSYS; }
-void fs_noval(void) { return; }
 /*
 * note: all filemap functions return negative error codes. These
 * need to be inverted before returning to the xfs core functions.
@@ -36,10 +32,9 @@ xfs_tosspages(
        xfs_off_t       last,
        int             fiopt)
 {
-        struct address_space *mapping = VFS_I(ip)->i_mapping;
+        /* can't toss partial tail pages, so mask them out */
+        last &= ~(PAGE_SIZE - 1);
-        if (mapping->nrpages)
+        truncate_inode_pages_range(VFS_I(ip)->i_mapping, first, last - 1);
-                truncate_inode_pages(mapping, first);
 }
 int
@@ -54,12 +49,11 @@ xfs_flushinval_pages(
        trace_xfs_pagecache_inval(ip, first, last);
-        if (mapping->nrpages) {
+        xfs_iflags_clear(ip, XFS_ITRUNCATED);
-                xfs_iflags_clear(ip, XFS_ITRUNCATED);
+        ret = filemap_write_and_wait_range(mapping, first,
-                ret = filemap_write_and_wait(mapping);
+                                last == -1 ? LLONG_MAX : last);
-                if (!ret)
+        if (!ret)
-                        truncate_inode_pages(mapping, first);
+                truncate_inode_pages_range(mapping, first, last);
-        }
        return -ret;
 }
@@ -75,10 +69,9 @@ xfs_flush_pages(
        int             ret = 0;
        int             ret2;
-        if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
+        xfs_iflags_clear(ip, XFS_ITRUNCATED);
-                xfs_iflags_clear(ip, XFS_ITRUNCATED);
+        ret = -filemap_fdatawrite_range(mapping, first,
-                ret = -filemap_fdatawrite(mapping);
+                                last == -1 ? LLONG_MAX : last);
-        }
        if (flags & XBF_ASYNC)
                return ret;
        ret2 = xfs_wait_on_pages(ip, first, last);
@@ -95,7 +88,9 @@ xfs_wait_on_pages(
 {
        struct address_space *mapping = VFS_I(ip)->i_mapping;
-        if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
+        if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) {
-                return -filemap_fdatawait(mapping);
+                return -filemap_fdatawait_range(mapping, first,
+                                        last == -1 ? ip->i_size - 1 : last);
+        }
        return 0;
 }
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.h b/fs/xfs/linux-2.6/xfs_fs_subr.h
deleted file mode 100644
index 82bb19b2599e..000000000000
--- a/fs/xfs/linux-2.6/xfs_fs_subr.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright (c) 2000,2002,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_FS_SUBR_H__
-#define __XFS_FS_SUBR_H__
-extern int  fs_noerr(void);
-extern int  fs_nosys(void);
-extern void fs_noval(void);
-#endif  /* __XFS_FS_SUBR_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c
index 2ae8b1ccb02e..76e81cff70b9 100644
--- a/fs/xfs/linux-2.6/xfs_globals.c
+++ b/fs/xfs/linux-2.6/xfs_globals.c
@@ -16,7 +16,6 @@
 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 #include "xfs.h"
-#include "xfs_cred.h"
 #include "xfs_sysctl.h"
 /*
diff --git a/fs/xfs/linux-2.6/xfs_globals.h b/fs/xfs/linux-2.6/xfs_globals.h
deleted file mode 100644
index 69f71caf061c..000000000000
--- a/fs/xfs/linux-2.6/xfs_globals.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_GLOBALS_H__
-#define __XFS_GLOBALS_H__
-extern uint64_t xfs_panic_mask;         /* set to cause more panics */
-#endif  /* __XFS_GLOBALS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index e59a81062830..2ea238f6d38e 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -23,24 +23,15 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
 #include "xfs_alloc.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dir2_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_ioctl.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
 #include "xfs_rtalloc.h"
 #include "xfs_itable.h"
 #include "xfs_error.h"
-#include "xfs_rw.h"
 #include "xfs_attr.h"
 #include "xfs_bmap.h"
 #include "xfs_buf_item.h"
@@ -794,10 +785,12 @@ xfs_ioc_fsgetxattr(
 {
        struct fsxattr          fa;
+        memset(&fa, 0, sizeof(struct fsxattr));
        xfs_ilock(ip, XFS_ILOCK_SHARED);
        fa.fsx_xflags = xfs_ip2xflags(ip);
        fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog;
-        fa.fsx_projid = ip->i_d.di_projid;
+        fa.fsx_projid = xfs_get_projid(ip);
        if (attr) {
                if (ip->i_afp) {
@@ -908,7 +901,7 @@ xfs_ioctl_setattr(
        struct xfs_dquot        *olddquot = NULL;
        int                     code;
-        xfs_itrace_entry(ip);
+        trace_xfs_ioctl_setattr(ip);
        if (mp->m_flags & XFS_MOUNT_RDONLY)
                return XFS_ERROR(EROFS);
@@ -916,6 +909,13 @@ xfs_ioctl_setattr(
                return XFS_ERROR(EIO);
        /*
+         * Disallow 32bit project ids when projid32bit feature is not enabled.
+         */
+        if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1) &&
+                        !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb))
+                return XFS_ERROR(EINVAL);
+        /*
         * If disk quotas is on, we make sure that the dquots do exist on disk,
         * before we start any other transactions. Trying to do this later
         * is messy. We don't care to take a readlock to look at the ids
@@ -961,7 +961,7 @@ xfs_ioctl_setattr(
        if (mask & FSX_PROJID) {
                if (XFS_IS_QUOTA_RUNNING(mp) &&
                    XFS_IS_PQUOTA_ON(mp) &&
-                    ip->i_d.di_projid != fa->fsx_projid) {
+                    xfs_get_projid(ip) != fa->fsx_projid) {
                        ASSERT(tp);
                        code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
                                                capable(CAP_FOWNER) ?
@@ -1043,8 +1043,7 @@ xfs_ioctl_setattr(
                }
        }
-        xfs_trans_ijoin(tp, ip, lock_flags);
+        xfs_trans_ijoin(tp, ip);
-        xfs_trans_ihold(tp, ip);
        /*
         * Change file ownership.  Must be the owner or privileged.
@@ -1064,12 +1063,12 @@ xfs_ioctl_setattr(
                 * Change the ownerships and register quota modifications
                 * in the transaction.
                 */
-                if (ip->i_d.di_projid != fa->fsx_projid) {
+                if (xfs_get_projid(ip) != fa->fsx_projid) {
                        if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) {
                                olddquot = xfs_qm_vop_chown(tp, ip,
                                                        &ip->i_gdquot, gdqp);
                        }
-                        ip->i_d.di_projid = fa->fsx_projid;
+                        xfs_set_projid(ip, fa->fsx_projid);
                        /*
                         * We may have to rev the inode as well as
@@ -1089,8 +1088,8 @@ xfs_ioctl_setattr(
                xfs_diflags_to_linux(ip);
        }
+        xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-        xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
        XFS_STATS_INC(xs_ig_attrchg);
@@ -1116,16 +1115,7 @@ xfs_ioctl_setattr(
        xfs_qm_dqrele(udqp);
        xfs_qm_dqrele(gdqp);
-        if (code)
+        return code;
-                return code;
-        if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE)) {
-                XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, ip, DM_RIGHT_NULL,
-                                NULL, DM_RIGHT_NULL, NULL, NULL, 0, 0,
-                                (mask & FSX_NONBLOCK) ? DM_FLAGS_NDELAY : 0);
-        }
-        return 0;
 error_return:
        xfs_qm_dqrele(udqp);
@@ -1301,7 +1291,7 @@ xfs_file_ioctl(
        if (filp->f_mode & FMODE_NOCMTIME)
                ioflags |= IO_INVIS;
-        xfs_itrace_entry(ip);
+        trace_xfs_file_ioctl(ip);
        switch (cmd) {
        case XFS_IOC_ALLOCSP:
@@ -1311,7 +1301,8 @@ xfs_file_ioctl(
        case XFS_IOC_ALLOCSP64:
        case XFS_IOC_FREESP64:
        case XFS_IOC_RESVSP64:
-        case XFS_IOC_UNRESVSP64: {
+        case XFS_IOC_UNRESVSP64:
+        case XFS_IOC_ZERO_RANGE: {
                xfs_flock64_t           bf;
                if (copy_from_user(&bf, arg, sizeof(bf)))
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index 52ed49e6465c..b3486dfa5520 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -28,12 +28,8 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dir2_sf.h"
 #include "xfs_vnode.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
@@ -168,7 +164,8 @@ xfs_ioctl32_bstat_copyin(
            get_user(bstat->bs_extsize, &bstat32->bs_extsize)   ||
            get_user(bstat->bs_extents, &bstat32->bs_extents)   ||
            get_user(bstat->bs_gen,     &bstat32->bs_gen)       ||
-            get_user(bstat->bs_projid,  &bstat32->bs_projid)    ||
+            get_user(bstat->bs_projid_lo, &bstat32->bs_projid_lo) ||
+            get_user(bstat->bs_projid_hi, &bstat32->bs_projid_hi) ||
            get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask) ||
            get_user(bstat->bs_dmstate, &bstat32->bs_dmstate)   ||
            get_user(bstat->bs_aextents, &bstat32->bs_aextents))
@@ -222,6 +219,7 @@ xfs_bulkstat_one_fmt_compat(
            put_user(buffer->bs_extents,  &p32->bs_extents)     ||
            put_user(buffer->bs_gen,      &p32->bs_gen)         ||
            put_user(buffer->bs_projid,   &p32->bs_projid)      ||
+            put_user(buffer->bs_projid_hi,      &p32->bs_projid_hi)     ||
            put_user(buffer->bs_dmevmask, &p32->bs_dmevmask)    ||
            put_user(buffer->bs_dmstate,  &p32->bs_dmstate)     ||
            put_user(buffer->bs_aextents, &p32->bs_aextents))
@@ -544,7 +542,7 @@ xfs_file_compat_ioctl(
        if (filp->f_mode & FMODE_NOCMTIME)
                ioflags |= IO_INVIS;
-        xfs_itrace_entry(ip);
+        trace_xfs_file_compat_ioctl(ip);
        switch (cmd) {
        /* No size or alignment issues on any arch */
@@ -578,6 +576,7 @@ xfs_file_compat_ioctl(
        case XFS_IOC_FSGEOMETRY_V1:
        case XFS_IOC_FSGROWFSDATA:
        case XFS_IOC_FSGROWFSRT:
+        case XFS_IOC_ZERO_RANGE:
                return xfs_file_ioctl(filp, cmd, p);
 #else
        case XFS_IOC_ALLOCSP_32:
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h
index 1024c4f8ba0d..08b605792a99 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.h
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.h
@@ -65,8 +65,10 @@ typedef struct compat_xfs_bstat {
        __s32           bs_extsize;     /* extent size                  */
        __s32           bs_extents;     /* number of extents            */
        __u32           bs_gen;         /* generation count             */
-        __u16           bs_projid;      /* project id                   */
+        __u16           bs_projid_lo;   /* lower part of project id     */
-        unsigned char   bs_pad[14];     /* pad space, unused            */
+#define bs_projid       bs_projid_lo    /* (previously just bs_projid)  */
+        __u16           bs_projid_hi;   /* high part of project id      */
+        unsigned char   bs_pad[12];     /* pad space, unused            */
        __u32           bs_dmevmask;    /* DMIG event mask              */
        __u16           bs_dmstate;     /* DMIG state info              */
        __u16           bs_aextents;    /* attribute number of extents  */
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 44f0b2de153e..96107efc0c61 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -24,21 +24,13 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
 #include "xfs_alloc.h"
-#include "xfs_dmapi.h"
 #include "xfs_quota.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_bmap.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
 #include "xfs_itable.h"
@@ -88,7 +80,7 @@ xfs_mark_inode_dirty_sync(
 {
        struct inode    *inode = VFS_I(ip);
-        if (!(inode->i_state & (I_WILL_FREE|I_FREEING|I_CLEAR)))
+        if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
                mark_inode_dirty_sync(inode);
 }
@@ -98,46 +90,11 @@ xfs_mark_inode_dirty(
 {
        struct inode    *inode = VFS_I(ip);
-        if (!(inode->i_state & (I_WILL_FREE|I_FREEING|I_CLEAR)))
+        if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
                mark_inode_dirty(inode);
 }
 /*
- * Change the requested timestamp in the given inode.
- * We don't lock across timestamp updates, and we don't log them but
- * we do record the fact that there is dirty information in core.
- */
-void
-xfs_ichgtime(
-        xfs_inode_t     *ip,
-        int             flags)
-{
-        struct inode    *inode = VFS_I(ip);
-        timespec_t      tv;
-        int             sync_it = 0;
-        tv = current_fs_time(inode->i_sb);
-        if ((flags & XFS_ICHGTIME_MOD) &&
-            !timespec_equal(&inode->i_mtime, &tv)) {
-                inode->i_mtime = tv;
-                sync_it = 1;
-        }
-        if ((flags & XFS_ICHGTIME_CHG) &&
-            !timespec_equal(&inode->i_ctime, &tv)) {
-                inode->i_ctime = tv;
-                sync_it = 1;
-        }
-        /*
-         * Update complete - now make sure everyone knows that the inode
-         * is dirty.
-         */
-        if (sync_it)
-                xfs_mark_inode_dirty_sync(ip);
-}
-/*
 * Hook in SELinux.  This is not quite correct yet, what we really need
 * here (as we do for default ACLs) is a mechanism by which creation of
 * these attrs can be journalled at inode creation time (along with the
@@ -232,7 +189,7 @@ xfs_vn_mknod(
        }
        xfs_dentry_to_name(&name, dentry);
-        error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip, NULL);
+        error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip);
        if (unlikely(error))
                goto out_free_acl;
@@ -360,7 +317,7 @@ xfs_vn_link(
        if (unlikely(error))
                return -error;
-        atomic_inc(&inode->i_count);
+        ihold(inode);
        d_instantiate(dentry, inode);
        return 0;
 }
@@ -405,7 +362,7 @@ xfs_vn_symlink(
                (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO);
        xfs_dentry_to_name(&name, dentry);
-        error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip, NULL);
+        error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip);
        if (unlikely(error))
                goto out;
@@ -496,7 +453,7 @@ xfs_vn_getattr(
        struct xfs_inode        *ip = XFS_I(inode);
        struct xfs_mount        *mp = ip->i_mount;
-        xfs_itrace_entry(ip);
+        trace_xfs_getattr(ip);
        if (XFS_FORCED_SHUTDOWN(mp))
                return XFS_ERROR(EIO);
@@ -548,21 +505,6 @@ xfs_vn_setattr(
        return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0);
 }
-/*
- * block_truncate_page can return an error, but we can't propagate it
- * at all here. Leave a complaint + stack trace in the syslog because
- * this could be bad. If it is bad, we need to propagate the error further.
- */
-STATIC void
-xfs_vn_truncate(
-        struct inode    *inode)
-{
-        int     error;
-        error = block_truncate_page(inode->i_mapping, inode->i_size,
-                                                        xfs_get_blocks);
-        WARN_ON(error);
-}
 STATIC long
 xfs_vn_fallocate(
        struct inode    *inode,
@@ -687,7 +629,7 @@ xfs_vn_fiemap(
                                        fieinfo->fi_extents_max + 1;
        bm.bmv_count = min_t(__s32, bm.bmv_count,
                             (PAGE_SIZE * 16 / sizeof(struct getbmapx)));
-        bm.bmv_iflags = BMV_IF_PREALLOC;
+        bm.bmv_iflags = BMV_IF_PREALLOC | BMV_IF_NO_HOLES;
        if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR)
                bm.bmv_iflags |= BMV_IF_ATTRFORK;
        if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC))
@@ -702,7 +644,6 @@ xfs_vn_fiemap(
 static const struct inode_operations xfs_inode_operations = {
        .check_acl              = xfs_check_acl,
-        .truncate               = xfs_vn_truncate,
        .getattr                = xfs_vn_getattr,
        .setattr                = xfs_vn_setattr,
        .setxattr               = generic_setxattr,
@@ -819,7 +760,9 @@ xfs_setup_inode(
        inode->i_ino = ip->i_ino;
        inode->i_state = I_NEW;
-        inode_add_to_lists(ip->i_mount->m_super, inode);
+        inode_sb_list_add(inode);
+        insert_inode_hash(inode);
        inode->i_mode   = ip->i_d.di_mode;
        inode->i_nlink  = ip->i_d.di_nlink;
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index facfb323a706..214ddd71ff79 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -71,6 +71,7 @@
 #include <linux/random.h>
 #include <linux/ctype.h>
 #include <linux/writeback.h>
+#include <linux/capability.h>
 #include <asm/page.h>
 #include <asm/div64.h>
@@ -79,15 +80,12 @@
 #include <asm/byteorder.h>
 #include <asm/unaligned.h>
-#include <xfs_cred.h>
 #include <xfs_vnode.h>
 #include <xfs_stats.h>
 #include <xfs_sysctl.h>
 #include <xfs_iops.h>
 #include <xfs_aops.h>
 #include <xfs_super.h>
-#include <xfs_globals.h>
-#include <xfs_fs_subr.h>
 #include <xfs_buf.h>
 /*
@@ -145,7 +143,7 @@
 #define SYNCHRONIZE()   barrier()
 #define __return_address __builtin_return_address(0)
-#define dfltprid        0
+#define XFS_PROJID_DEFAULT      0
 #define MAXPATHLEN      1024
 #define MIN(a,b)        (min(a,b))
@@ -157,8 +155,6 @@
 */
 #define xfs_sort(a,n,s,fn)      sort(a,n,s,fn,NULL)
 #define xfs_stack_trace()       dump_stack()
-#define xfs_itruncate_data(ip, off)     \
-        (-vmtruncate(VFS_I(ip), (off)))
 /* Move the kernel do_div definition off to one side */
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c
index 067cafbfc635..29b9d642e93d 100644
--- a/fs/xfs/linux-2.6/xfs_quotaops.c
+++ b/fs/xfs/linux-2.6/xfs_quotaops.c
@@ -16,7 +16,6 @@
 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 #include "xfs.h"
-#include "xfs_dmapi.h"
 #include "xfs_sb.h"
 #include "xfs_inum.h"
 #include "xfs_log.h"
@@ -69,15 +68,15 @@ xfs_fs_set_xstate(
        if (op != Q_XQUOTARM && !XFS_IS_QUOTA_RUNNING(mp))
                return -ENOSYS;
-        if (uflags & XFS_QUOTA_UDQ_ACCT)
+        if (uflags & FS_QUOTA_UDQ_ACCT)
                flags |= XFS_UQUOTA_ACCT;
-        if (uflags & XFS_QUOTA_PDQ_ACCT)
+        if (uflags & FS_QUOTA_PDQ_ACCT)
                flags |= XFS_PQUOTA_ACCT;
-        if (uflags & XFS_QUOTA_GDQ_ACCT)
+        if (uflags & FS_QUOTA_GDQ_ACCT)
                flags |= XFS_GQUOTA_ACCT;
-        if (uflags & XFS_QUOTA_UDQ_ENFD)
+        if (uflags & FS_QUOTA_UDQ_ENFD)
                flags |= XFS_UQUOTA_ENFD;
-        if (uflags & (XFS_QUOTA_PDQ_ENFD|XFS_QUOTA_GDQ_ENFD))
+        if (uflags & (FS_QUOTA_PDQ_ENFD|FS_QUOTA_GDQ_ENFD))
                flags |= XFS_OQUOTA_ENFD;
        switch (op) {
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 80938c736c27..cf808782c065 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -25,14 +25,11 @@
 #include "xfs_ag.h"
 #include "xfs_dir2.h"
 #include "xfs_alloc.h"
-#include "xfs_dmapi.h"
 #include "xfs_quota.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_btree.h"
@@ -43,12 +40,10 @@
 #include "xfs_error.h"
 #include "xfs_itable.h"
 #include "xfs_fsops.h"
-#include "xfs_rw.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_utils.h"
 #include "xfs_vnodeops.h"
-#include "xfs_version.h"
 #include "xfs_log_priv.h"
 #include "xfs_trans_priv.h"
 #include "xfs_filestream.h"
@@ -94,7 +89,6 @@ mempool_t *xfs_ioend_pool;
 #define MNTOPT_BARRIER  "barrier"       /* use writer barriers for log write and
                                         * unwritten extent conversion */
 #define MNTOPT_NOBARRIER "nobarrier"    /* .. disable */
-#define MNTOPT_OSYNCISOSYNC "osyncisosync" /* o_sync is REALLY o_sync */
 #define MNTOPT_64BITINODE   "inode64"   /* inodes can be allocated anywhere */
 #define MNTOPT_IKEEP    "ikeep"         /* do not free empty inode clusters */
 #define MNTOPT_NOIKEEP  "noikeep"       /* free empty inode clusters */
@@ -116,9 +110,6 @@ mempool_t *xfs_ioend_pool;
 #define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */
 #define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */
 #define MNTOPT_QUOTANOENF  "qnoenforce" /* same as uqnoenforce */
-#define MNTOPT_DMAPI    "dmapi"         /* DMI enabled (DMAPI / XDSM) */
-#define MNTOPT_XDSM     "xdsm"          /* DMI enabled (DMAPI / XDSM) */
-#define MNTOPT_DMI      "dmi"           /* DMI enabled (DMAPI / XDSM) */
 #define MNTOPT_DELAYLOG   "delaylog"    /* Delayed loging enabled */
 #define MNTOPT_NODELAYLOG "nodelaylog"  /* Delayed loging disabled */
@@ -172,15 +163,13 @@ suffix_strtoul(char *s, char **endp, unsigned int base)
 STATIC int
 xfs_parseargs(
        struct xfs_mount        *mp,
-        char                    *options,
+        char                    *options)
-        char                    **mtpt)
 {
        struct super_block      *sb = mp->m_super;
        char                    *this_char, *value, *eov;
        int                     dsunit = 0;
        int                     dswidth = 0;
        int                     iosize = 0;
-        int                     dmapi_implies_ikeep = 1;
        __uint8_t               iosizelog = 0;
        /*
@@ -243,15 +232,10 @@ xfs_parseargs(
                        if (!mp->m_logname)
                                return ENOMEM;
                } else if (!strcmp(this_char, MNTOPT_MTPT)) {
-                        if (!value || !*value) {
+                        cmn_err(CE_WARN,
-                                cmn_err(CE_WARN,
+                                "XFS: %s option not allowed on this system",
-                                        "XFS: %s option requires an argument",
+                                this_char);
-                                        this_char);
+                        return EINVAL;
-                                return EINVAL;
-                        }
-                        *mtpt = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
-                        if (!*mtpt)
-                                return ENOMEM;
                } else if (!strcmp(this_char, MNTOPT_RTDEV)) {
                        if (!value || !*value) {
                                cmn_err(CE_WARN,
@@ -288,8 +272,6 @@ xfs_parseargs(
                        mp->m_flags &= ~XFS_MOUNT_GRPID;
                } else if (!strcmp(this_char, MNTOPT_WSYNC)) {
                        mp->m_flags |= XFS_MOUNT_WSYNC;
-                } else if (!strcmp(this_char, MNTOPT_OSYNCISOSYNC)) {
-                        mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC;
                } else if (!strcmp(this_char, MNTOPT_NORECOVERY)) {
                        mp->m_flags |= XFS_MOUNT_NORECOVERY;
                } else if (!strcmp(this_char, MNTOPT_NOALIGN)) {
@@ -329,7 +311,6 @@ xfs_parseargs(
                } else if (!strcmp(this_char, MNTOPT_IKEEP)) {
                        mp->m_flags |= XFS_MOUNT_IKEEP;
                } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
-                        dmapi_implies_ikeep = 0;
                        mp->m_flags &= ~XFS_MOUNT_IKEEP;
                } else if (!strcmp(this_char, MNTOPT_LARGEIO)) {
                        mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE;
@@ -370,12 +351,6 @@ xfs_parseargs(
                } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) {
                        mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
                        mp->m_qflags &= ~XFS_OQUOTA_ENFD;
-                } else if (!strcmp(this_char, MNTOPT_DMAPI)) {
-                        mp->m_flags |= XFS_MOUNT_DMAPI;
-                } else if (!strcmp(this_char, MNTOPT_XDSM)) {
-                        mp->m_flags |= XFS_MOUNT_DMAPI;
-                } else if (!strcmp(this_char, MNTOPT_DMI)) {
-                        mp->m_flags |= XFS_MOUNT_DMAPI;
                } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) {
                        mp->m_flags |= XFS_MOUNT_DELAYLOG;
                        cmn_err(CE_WARN,
@@ -387,9 +362,11 @@ xfs_parseargs(
                        cmn_err(CE_WARN,
        "XFS: ihashsize no longer used, option is deprecated.");
                } else if (!strcmp(this_char, "osyncisdsync")) {
-                        /* no-op, this is now the default */
                        cmn_err(CE_WARN,
-        "XFS: osyncisdsync is now the default, option is deprecated.");
+        "XFS: osyncisdsync has no effect, option is deprecated.");
+                } else if (!strcmp(this_char, "osyncisosync")) {
+                        cmn_err(CE_WARN,
+        "XFS: osyncisosync has no effect, option is deprecated.");
                } else if (!strcmp(this_char, "irixsgid")) {
                        cmn_err(CE_WARN,
        "XFS: irixsgid is now a sysctl(2) variable, option is deprecated.");
@@ -430,12 +407,6 @@ xfs_parseargs(
                return EINVAL;
        }
-        if ((mp->m_flags & XFS_MOUNT_DMAPI) && (!*mtpt || *mtpt[0] == '\0')) {
-                printk("XFS: %s option needs the mount point option as well\n",
-                        MNTOPT_DMAPI);
-                return EINVAL;
-        }
        if ((dsunit && !dswidth) || (!dsunit && dswidth)) {
                cmn_err(CE_WARN,
                        "XFS: sunit and swidth must be specified together");
@@ -449,18 +420,6 @@ xfs_parseargs(
                return EINVAL;
        }
-        /*
-         * Applications using DMI filesystems often expect the
-         * inode generation number to be monotonically increasing.
-         * If we delete inode chunks we break this assumption, so
-         * keep unused inode chunks on disk for DMI filesystems
-         * until we come up with a better solution.
-         * Note that if "ikeep" or "noikeep" mount options are
-         * supplied, then they are honored.
-         */
-        if ((mp->m_flags & XFS_MOUNT_DMAPI) && dmapi_implies_ikeep)
-                mp->m_flags |= XFS_MOUNT_IKEEP;
 done:
        if (!(mp->m_flags & XFS_MOUNT_NOALIGN)) {
                /*
@@ -539,10 +498,8 @@ xfs_showargs(
                { XFS_MOUNT_SWALLOC,            "," MNTOPT_SWALLOC },
                { XFS_MOUNT_NOUUID,             "," MNTOPT_NOUUID },
                { XFS_MOUNT_NORECOVERY,         "," MNTOPT_NORECOVERY },
-                { XFS_MOUNT_OSYNCISOSYNC,       "," MNTOPT_OSYNCISOSYNC },
                { XFS_MOUNT_ATTR2,              "," MNTOPT_ATTR2 },
                { XFS_MOUNT_FILESTREAMS,        "," MNTOPT_FILESTREAM },
-                { XFS_MOUNT_DMAPI,              "," MNTOPT_DMAPI },
                { XFS_MOUNT_GRPID,              "," MNTOPT_GRPID },
                { XFS_MOUNT_DELAYLOG,           "," MNTOPT_DELAYLOG },
                { 0, NULL }
@@ -619,7 +576,7 @@ xfs_max_file_offset(
        /* Figure out maximum filesize, on Linux this can depend on
         * the filesystem blocksize (on 32 bit platforms).
-         * __block_prepare_write does this in an [unsigned] long...
+         * __block_write_begin does this in an [unsigned] long...
         *      page->index << (PAGE_CACHE_SHIFT - bbits)
         * So, for page sized blocks (4K on 32 bit platforms),
         * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
@@ -687,7 +644,7 @@ xfs_barrier_test(
        XFS_BUF_ORDERED(sbp);
        xfsbdstrat(mp, sbp);
-        error = xfs_iowait(sbp);
+        error = xfs_buf_iowait(sbp);
        /*
         * Clear all the flags we set and possible error state in the
@@ -735,8 +692,7 @@ void
 xfs_blkdev_issue_flush(
        xfs_buftarg_t           *buftarg)
 {
-        blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL,
+        blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL);
-                        BLKDEV_IFL_WAIT);
 }
 STATIC void
@@ -800,18 +756,20 @@ xfs_open_devices(
         * Setup xfs_mount buffer target pointers
         */
        error = ENOMEM;
-        mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0, mp->m_fsname);
+        mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, 0, mp->m_fsname);
        if (!mp->m_ddev_targp)
                goto out_close_rtdev;
        if (rtdev) {
-                mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1, mp->m_fsname);
+                mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, 1,
+                                                        mp->m_fsname);
                if (!mp->m_rtdev_targp)
                        goto out_free_ddev_targ;
        }
        if (logdev && logdev != ddev) {
-                mp->m_logdev_targp = xfs_alloc_buftarg(logdev, 1, mp->m_fsname);
+                mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, 1,
+                                                        mp->m_fsname);
                if (!mp->m_logdev_targp)
                        goto out_free_rtdev_targ;
        } else {
@@ -947,7 +905,7 @@ xfs_fs_destroy_inode(
 {
        struct xfs_inode        *ip = XFS_I(inode);
-        xfs_itrace_entry(ip);
+        trace_xfs_destroy_inode(ip);
        XFS_STATS_INC(vn_reclaim);
@@ -1014,12 +972,7 @@ xfs_fs_inode_init_once(
 /*
 * Dirty the XFS inode when mark_inode_dirty_sync() is called so that
- * we catch unlogged VFS level updates to the inode. Care must be taken
+ * we catch unlogged VFS level updates to the inode.
- * here - the transaction code calls mark_inode_dirty_sync() to mark the
- * VFS inode dirty in a transaction and clears the i_update_core field;
- * it must clear the field after calling mark_inode_dirty_sync() to
- * correctly indicate that the dirty state has been propagated into the
- * inode log item.
 *
 * We need the barrier() to maintain correct ordering between unlogged
 * updates and the transaction commit code that clears the i_update_core
@@ -1063,10 +1016,8 @@ xfs_log_inode(
         * an inode in another recent transaction.  So we play it safe and
         * fire off the transaction anyway.
         */
-        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+        xfs_trans_ijoin(tp, ip);
-        xfs_trans_ihold(tp, ip);
        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-        xfs_trans_set_sync(tp);
        error = xfs_trans_commit(tp, 0);
        xfs_ilock_demote(ip, XFS_ILOCK_EXCL);
@@ -1082,27 +1033,18 @@ xfs_fs_write_inode(
        struct xfs_mount        *mp = ip->i_mount;
        int                     error = EAGAIN;
-        xfs_itrace_entry(ip);
+        trace_xfs_write_inode(ip);
        if (XFS_FORCED_SHUTDOWN(mp))
                return XFS_ERROR(EIO);
        if (wbc->sync_mode == WB_SYNC_ALL) {
                /*
-                 * Make sure the inode has hit stable storage.  By using the
+                 * Make sure the inode has made it it into the log.  Instead
-                 * log and the fsync transactions we reduce the IOs we have
+                 * of forcing it all the way to stable storage using a
-                 * to do here from two (log and inode) to just the log.
+                 * synchronous transaction we let the log force inside the
-                 *
+                 * ->sync_fs call do that for thus, which reduces the number
-                 * Note: We still need to do a delwri write of the inode after
+                 * of synchronous log foces dramatically.
-                 * this to flush it to the backing buffer so that bulkstat
-                 * works properly if this is the first time the inode has been
-                 * written.  Because we hold the ilock atomically over the
-                 * transaction commit and the inode flush we are guaranteed
-                 * that the inode is not pinned when it returns. If the flush
-                 * lock is already held, then the inode has already been
-                 * flushed once and we don't need to flush it again.  Hence
-                 * the code will only flush the inode if it isn't already
-                 * being flushed.
                 */
                xfs_ioend_wait(ip);
                xfs_ilock(ip, XFS_ILOCK_SHARED);
@@ -1116,27 +1058,29 @@ xfs_fs_write_inode(
                 * We make this non-blocking if the inode is contended, return
                 * EAGAIN to indicate to the caller that they did not succeed.
                 * This prevents the flush path from blocking on inodes inside
-                 * another operation right now, they get caught later by xfs_sync.
+                 * another operation right now, they get caught later by
+                 * xfs_sync.
                 */
                if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
                        goto out;
-        }
-        if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip))
+                if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip))
-                goto out_unlock;
+                        goto out_unlock;
-        /*
+                /*
-         * Now we have the flush lock and the inode is not pinned, we can check
+                 * Now we have the flush lock and the inode is not pinned, we
-         * if the inode is really clean as we know that there are no pending
+                 * can check if the inode is really clean as we know that
-         * transaction completions, it is not waiting on the delayed write
+                 * there are no pending transaction completions, it is not
-         * queue and there is no IO in progress.
+                 * waiting on the delayed write queue and there is no IO in
-         */
+                 * progress.
-        if (xfs_inode_clean(ip)) {
+                 */
-                xfs_ifunlock(ip);
+                if (xfs_inode_clean(ip)) {
-                error = 0;
+                        xfs_ifunlock(ip);
-                goto out_unlock;
+                        error = 0;
+                        goto out_unlock;
+                }
+                error = xfs_iflush(ip, 0);
        }
-        error = xfs_iflush(ip, 0);
 out_unlock:
        xfs_iunlock(ip, XFS_ILOCK_SHARED);
@@ -1151,12 +1095,15 @@ xfs_fs_write_inode(
 }
 STATIC void
-xfs_fs_clear_inode(
+xfs_fs_evict_inode(
        struct inode            *inode)
 {
        xfs_inode_t             *ip = XFS_I(inode);
-        xfs_itrace_entry(ip);
+        trace_xfs_evict_inode(ip);
+        truncate_inode_pages(&inode->i_data, 0);
+        end_writeback(inode);
        XFS_STATS_INC(vn_rele);
        XFS_STATS_INC(vn_remove);
        XFS_STATS_DEC(vn_active);
@@ -1193,22 +1140,13 @@ xfs_fs_put_super(
 {
        struct xfs_mount        *mp = XFS_M(sb);
+        /*
+         * Unregister the memory shrinker before we tear down the mount
+         * structure so we don't have memory reclaim racing with us here.
+         */
+        xfs_inode_shrinker_unregister(mp);
        xfs_syncd_stop(mp);
-        if (!(sb->s_flags & MS_RDONLY)) {
-                /*
-                 * XXX(hch): this should be SYNC_WAIT.
-                 *
-                 * Or more likely not needed at all because the VFS is already
-                 * calling ->sync_fs after shutting down all filestem
-                 * operations and just before calling ->put_super.
-                 */
-                xfs_sync_data(mp, 0);
-                xfs_sync_attr(mp, 0);
-        }
-        XFS_SEND_PREUNMOUNT(mp);
        /*
         * Blow away any referenced inode in the filestreams cache.
         * This can and will cause log traffic as inodes go inactive
@@ -1218,14 +1156,10 @@ xfs_fs_put_super(
        XFS_bflush(mp->m_ddev_targp);
-        XFS_SEND_UNMOUNT(mp);
        xfs_unmountfs(mp);
        xfs_freesb(mp);
-        xfs_inode_shrinker_unregister(mp);
        xfs_icsb_destroy_counters(mp);
        xfs_close_devices(mp);
-        xfs_dmops_put(mp);
        xfs_free_fsname(mp);
        kfree(mp);
 }
@@ -1287,6 +1221,7 @@ xfs_fs_statfs(
        struct xfs_inode        *ip = XFS_I(dentry->d_inode);
        __uint64_t              fakeinos, id;
        xfs_extlen_t            lsize;
+        __int64_t               ffree;
        statp->f_type = XFS_SB_MAGIC;
        statp->f_namelen = MAXNAMELEN - 1;
@@ -1310,7 +1245,11 @@ xfs_fs_statfs(
                statp->f_files = min_t(typeof(statp->f_files),
                                        statp->f_files,
                                        mp->m_maxicount);
-        statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
+        /* make sure statp->f_ffree does not underflow */
+        ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
+        statp->f_ffree = max_t(__int64_t, ffree, 0);
        spin_unlock(&mp->m_sb_lock);
        if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) ||
@@ -1463,7 +1402,7 @@ xfs_fs_freeze(
        xfs_save_resvblks(mp);
        xfs_quiesce_attr(mp);
-        return -xfs_fs_log_dummy(mp);
+        return -xfs_fs_log_dummy(mp, SYNC_WAIT);
 }
 STATIC int
@@ -1543,7 +1482,6 @@ xfs_fs_fill_super(
        struct inode            *root;
        struct xfs_mount        *mp = NULL;
        int                     flags = 0, error = ENOMEM;
-        char                    *mtpt = NULL;
        mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
        if (!mp)
@@ -1559,7 +1497,7 @@ xfs_fs_fill_super(
        mp->m_super = sb;
        sb->s_fs_info = mp;
-        error = xfs_parseargs(mp, (char *)data, &mtpt);
+        error = xfs_parseargs(mp, (char *)data);
        if (error)
                goto out_free_fsname;
@@ -1571,19 +1509,16 @@ xfs_fs_fill_super(
 #endif
        sb->s_op = &xfs_super_operations;
-        error = xfs_dmops_get(mp);
-        if (error)
-                goto out_free_fsname;
        if (silent)
                flags |= XFS_MFSI_QUIET;
        error = xfs_open_devices(mp);
        if (error)
-                goto out_put_dmops;
+                goto out_free_fsname;
-        if (xfs_icsb_init_counters(mp))
+        error = xfs_icsb_init_counters(mp);
-                mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
+        if (error)
+                goto out_close_devices;
        error = xfs_readsb(mp, flags);
        if (error)
@@ -1608,8 +1543,6 @@ xfs_fs_fill_super(
        if (error)
                goto out_filestream_unmount;
-        XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, mtpt, mp->m_fsname);
        sb->s_magic = XFS_SB_MAGIC;
        sb->s_blocksize = mp->m_sb.sb_blocksize;
        sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
@@ -1638,7 +1571,6 @@ xfs_fs_fill_super(
        xfs_inode_shrinker_register(mp);
-        kfree(mtpt);
        return 0;
 out_filestream_unmount:
@@ -1647,12 +1579,10 @@ xfs_fs_fill_super(
        xfs_freesb(mp);
 out_destroy_counters:
        xfs_icsb_destroy_counters(mp);
+ out_close_devices:
        xfs_close_devices(mp);
- out_put_dmops:
-        xfs_dmops_put(mp);
 out_free_fsname:
        xfs_free_fsname(mp);
-        kfree(mtpt);
        kfree(mp);
 out:
        return -error;
@@ -1696,7 +1626,7 @@ static const struct super_operations xfs_super_operations = {
        .destroy_inode          = xfs_fs_destroy_inode,
        .dirty_inode            = xfs_fs_dirty_inode,
        .write_inode            = xfs_fs_write_inode,
-        .clear_inode            = xfs_fs_clear_inode,
+        .evict_inode            = xfs_fs_evict_inode,
        .put_super              = xfs_fs_put_super,
        .sync_fs                = xfs_fs_sync_fs,
        .freeze_fs              = xfs_fs_freeze,
@@ -1759,6 +1689,12 @@ xfs_init_zones(void)
        if (!xfs_trans_zone)
                goto out_destroy_ifork_zone;
+        xfs_log_item_desc_zone =
+                kmem_zone_init(sizeof(struct xfs_log_item_desc),
+                               "xfs_log_item_desc");
+        if (!xfs_log_item_desc_zone)
+                goto out_destroy_trans_zone;
        /*
         * The size of the zone allocated buf log item is the maximum
         * size possible under XFS.  This wastes a little bit of memory,
@@ -1768,7 +1704,7 @@ xfs_init_zones(void)
                                (((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) /
                                  NBWORD) * sizeof(int))), "xfs_buf_item");
        if (!xfs_buf_item_zone)
-                goto out_destroy_trans_zone;
+                goto out_destroy_log_item_desc_zone;
        xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) +
                        ((XFS_EFD_MAX_FAST_EXTENTS - 1) *
@@ -1805,6 +1741,8 @@ xfs_init_zones(void)
        kmem_zone_destroy(xfs_efd_zone);
 out_destroy_buf_item_zone:
        kmem_zone_destroy(xfs_buf_item_zone);
+ out_destroy_log_item_desc_zone:
+        kmem_zone_destroy(xfs_log_item_desc_zone);
 out_destroy_trans_zone:
        kmem_zone_destroy(xfs_trans_zone);
 out_destroy_ifork_zone:
@@ -1835,6 +1773,7 @@ xfs_destroy_zones(void)
        kmem_zone_destroy(xfs_efi_zone);
        kmem_zone_destroy(xfs_efd_zone);
        kmem_zone_destroy(xfs_buf_item_zone);
+        kmem_zone_destroy(xfs_log_item_desc_zone);
        kmem_zone_destroy(xfs_trans_zone);
        kmem_zone_destroy(xfs_ifork_zone);
        kmem_zone_destroy(xfs_dabuf_zone);
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index 519618e9279e..50a3266c999e 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -56,23 +56,17 @@ extern void xfs_qm_exit(void);
 # define XFS_BIGFS_STRING
 #endif
-#ifdef CONFIG_XFS_DMAPI
-# define XFS_DMAPI_STRING       "dmapi support, "
-#else
-# define XFS_DMAPI_STRING
-#endif
 #ifdef DEBUG
 # define XFS_DBG_STRING         "debug"
 #else
 # define XFS_DBG_STRING         "no debug"
 #endif
+#define XFS_VERSION_STRING      "SGI XFS"
 #define XFS_BUILD_OPTIONS       XFS_ACL_STRING \
                                XFS_SECURITY_STRING \
                                XFS_REALTIME_STRING \
                                XFS_BIGFS_STRING \
-                                XFS_DMAPI_STRING \
                                XFS_DBG_STRING /* DBG must be last */
 struct xfs_inode;
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index a51a07c3a70c..37d33254981d 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -24,67 +24,54 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
 #include "xfs_inode.h"
 #include "xfs_dinode.h"
 #include "xfs_error.h"
-#include "xfs_mru_cache.h"
 #include "xfs_filestream.h"
 #include "xfs_vnodeops.h"
-#include "xfs_utils.h"
-#include "xfs_buf_item.h"
 #include "xfs_inode_item.h"
-#include "xfs_rw.h"
 #include "xfs_quota.h"
 #include "xfs_trace.h"
+#include "xfs_fsops.h"
 #include <linux/kthread.h>
 #include <linux/freezer.h>
+/*
+ * The inode lookup is done in batches to keep the amount of lock traffic and
+ * radix tree lookups to a minimum. The batch size is a trade off between
+ * lookup reduction and stack usage. This is in the reclaim path, so we can't
+ * be too greedy.
+ */
+#define XFS_LOOKUP_BATCH        32
-STATIC xfs_inode_t *
+STATIC int
-xfs_inode_ag_lookup(
+xfs_inode_ag_walk_grab(
-        struct xfs_mount        *mp,
+        struct xfs_inode        *ip)
-        struct xfs_perag        *pag,
-        uint32_t                *first_index,
-        int                     tag)
 {
-        int                     nr_found;
+        struct inode            *inode = VFS_I(ip);
-        struct xfs_inode        *ip;
-        /*
+        /* nothing to sync during shutdown */
-         * use a gang lookup to find the next inode in the tree
+        if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-         * as the tree is sparse and a gang lookup walks to find
+                return EFSCORRUPTED;
-         * the number of objects requested.
-         */
+        /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
-        if (tag == XFS_ICI_NO_TAG) {
+        if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
-                nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
+                return ENOENT;
-                                (void **)&ip, *first_index, 1);
-        } else {
+        /* If we can't grab the inode, it must on it's way to reclaim. */
-                nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
+        if (!igrab(inode))
-                                (void **)&ip, *first_index, 1, tag);
+                return ENOENT;
+        if (is_bad_inode(inode)) {
+                IRELE(ip);
+                return ENOENT;
        }
-        if (!nr_found)
-                return NULL;
-        /*
+        /* inode is valid */
-         * Update the index for the next lookup. Catch overflows
+        return 0;
-         * into the next AG range which can occur if we have inodes
-         * in the last block of the AG and we are currently
-         * pointing to the last inode.
-         */
-        *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
-        if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
-                return NULL;
-        return ip;
 }
 STATIC int
@@ -93,49 +80,75 @@ xfs_inode_ag_walk(
        struct xfs_perag        *pag,
        int                     (*execute)(struct xfs_inode *ip,
                                           struct xfs_perag *pag, int flags),
-        int                     flags,
+        int                     flags)
-        int                     tag,
-        int                     exclusive,
-        int                     *nr_to_scan)
 {
        uint32_t                first_index;
        int                     last_error = 0;
        int                     skipped;
+        int                     done;
+        int                     nr_found;
 restart:
+        done = 0;
        skipped = 0;
        first_index = 0;
+        nr_found = 0;
        do {
+                struct xfs_inode *batch[XFS_LOOKUP_BATCH];
                int             error = 0;
-                xfs_inode_t     *ip;
+                int             i;
-                if (exclusive)
+                read_lock(&pag->pag_ici_lock);
-                        write_lock(&pag->pag_ici_lock);
+                nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
-                else
+                                        (void **)batch, first_index,
-                        read_lock(&pag->pag_ici_lock);
+                                        XFS_LOOKUP_BATCH);
-                ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag);
+                if (!nr_found) {
-                if (!ip) {
+                        read_unlock(&pag->pag_ici_lock);
-                        if (exclusive)
-                                write_unlock(&pag->pag_ici_lock);
-                        else
-                                read_unlock(&pag->pag_ici_lock);
                        break;
                }
-                /* execute releases pag->pag_ici_lock */
+                /*
-                error = execute(ip, pag, flags);
+                 * Grab the inodes before we drop the lock. if we found
-                if (error == EAGAIN) {
+                 * nothing, nr == 0 and the loop will be skipped.
-                        skipped++;
+                 */
-                        continue;
+                for (i = 0; i < nr_found; i++) {
+                        struct xfs_inode *ip = batch[i];
+                        if (done || xfs_inode_ag_walk_grab(ip))
+                                batch[i] = NULL;
+                        /*
+                         * Update the index for the next lookup. Catch overflows
+                         * into the next AG range which can occur if we have inodes
+                         * in the last block of the AG and we are currently
+                         * pointing to the last inode.
+                         */
+                        first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+                        if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
+                                done = 1;
+                }
+                /* unlock now we've grabbed the inodes. */
+                read_unlock(&pag->pag_ici_lock);
+                for (i = 0; i < nr_found; i++) {
+                        if (!batch[i])
+                                continue;
+                        error = execute(batch[i], pag, flags);
+                        IRELE(batch[i]);
+                        if (error == EAGAIN) {
+                                skipped++;
+                                continue;
+                        }
+                        if (error && last_error != EFSCORRUPTED)
+                                last_error = error;
                }
-                if (error)
-                        last_error = error;
                /* bail out if the filesystem is corrupted.  */
                if (error == EFSCORRUPTED)
                        break;
-        } while ((*nr_to_scan)--);
+        } while (nr_found && !done);
        if (skipped) {
                delay(1);
@@ -144,110 +157,32 @@ restart:
        return last_error;
 }
-/*
- * Select the next per-ag structure to iterate during the walk. The reclaim
- * walk is optimised only to walk AGs with reclaimable inodes in them.
- */
-static struct xfs_perag *
-xfs_inode_ag_iter_next_pag(
-        struct xfs_mount        *mp,
-        xfs_agnumber_t          *first,
-        int                     tag)
-{
-        struct xfs_perag        *pag = NULL;
-        if (tag == XFS_ICI_RECLAIM_TAG) {
-                int found;
-                int ref;
-                spin_lock(&mp->m_perag_lock);
-                found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
-                                (void **)&pag, *first, 1, tag);
-                if (found <= 0) {
-                        spin_unlock(&mp->m_perag_lock);
-                        return NULL;
-                }
-                *first = pag->pag_agno + 1;
-                /* open coded pag reference increment */
-                ref = atomic_inc_return(&pag->pag_ref);
-                spin_unlock(&mp->m_perag_lock);
-                trace_xfs_perag_get_reclaim(mp, pag->pag_agno, ref, _RET_IP_);
-        } else {
-                pag = xfs_perag_get(mp, *first);
-                (*first)++;
-        }
-        return pag;
-}
 int
 xfs_inode_ag_iterator(
        struct xfs_mount        *mp,
        int                     (*execute)(struct xfs_inode *ip,
                                           struct xfs_perag *pag, int flags),
-        int                     flags,
+        int                     flags)
-        int                     tag,
-        int                     exclusive,
-        int                     *nr_to_scan)
 {
        struct xfs_perag        *pag;
        int                     error = 0;
        int                     last_error = 0;
        xfs_agnumber_t          ag;
-        int                     nr;
-        nr = nr_to_scan ? *nr_to_scan : INT_MAX;
        ag = 0;
-        while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag, tag))) {
+        while ((pag = xfs_perag_get(mp, ag))) {
-                error = xfs_inode_ag_walk(mp, pag, execute, flags, tag,
+                ag = pag->pag_agno + 1;
-                                                exclusive, &nr);
+                error = xfs_inode_ag_walk(mp, pag, execute, flags);
                xfs_perag_put(pag);
                if (error) {
                        last_error = error;
                        if (error == EFSCORRUPTED)
                                break;
                }
-                if (nr <= 0)
-                        break;
        }
-        if (nr_to_scan)
-                *nr_to_scan = nr;
        return XFS_ERROR(last_error);
 }
-/* must be called with pag_ici_lock held and releases it */
-int
-xfs_sync_inode_valid(
-        struct xfs_inode        *ip,
-        struct xfs_perag        *pag)
-{
-        struct inode            *inode = VFS_I(ip);
-        int                     error = EFSCORRUPTED;
-        /* nothing to sync during shutdown */
-        if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-                goto out_unlock;
-        /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
-        error = ENOENT;
-        if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
-                goto out_unlock;
-        /* If we can't grab the inode, it must on it's way to reclaim. */
-        if (!igrab(inode))
-                goto out_unlock;
-        if (is_bad_inode(inode)) {
-                IRELE(ip);
-                goto out_unlock;
-        }
-        /* inode is valid */
-        error = 0;
-out_unlock:
-        read_unlock(&pag->pag_ici_lock);
-        return error;
-}
 STATIC int
 xfs_sync_inode_data(
        struct xfs_inode        *ip,
@@ -258,10 +193,6 @@ xfs_sync_inode_data(
        struct address_space *mapping = inode->i_mapping;
        int                     error = 0;
-        error = xfs_sync_inode_valid(ip, pag);
-        if (error)
-                return error;
        if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
                goto out_wait;
@@ -278,7 +209,6 @@ xfs_sync_inode_data(
 out_wait:
        if (flags & SYNC_WAIT)
                xfs_ioend_wait(ip);
-        IRELE(ip);
        return error;
 }
@@ -290,10 +220,6 @@ xfs_sync_inode_attr(
 {
        int                     error = 0;
-        error = xfs_sync_inode_valid(ip, pag);
-        if (error)
-                return error;
        xfs_ilock(ip, XFS_ILOCK_SHARED);
        if (xfs_inode_clean(ip))
                goto out_unlock;
@@ -312,14 +238,13 @@ xfs_sync_inode_attr(
 out_unlock:
        xfs_iunlock(ip, XFS_ILOCK_SHARED);
-        IRELE(ip);
        return error;
 }
 /*
 * Write out pagecache data for the whole filesystem.
 */
-int
+STATIC int
 xfs_sync_data(
        struct xfs_mount        *mp,
        int                     flags)
@@ -328,8 +253,7 @@ xfs_sync_data(
        ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
-        error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
+        error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags);
-                                      XFS_ICI_NO_TAG, 0, NULL);
        if (error)
                return XFS_ERROR(error);
@@ -340,48 +264,14 @@ xfs_sync_data(
 /*
 * Write out inode metadata (attributes) for the whole filesystem.
 */
-int
+STATIC int
 xfs_sync_attr(
        struct xfs_mount        *mp,
        int                     flags)
 {
        ASSERT((flags & ~SYNC_WAIT) == 0);
-        return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,
+        return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags);
-                                     XFS_ICI_NO_TAG, 0, NULL);
-}
-STATIC int
-xfs_commit_dummy_trans(
-        struct xfs_mount        *mp,
-        uint                    flags)
-{
-        struct xfs_inode        *ip = mp->m_rootip;
-        struct xfs_trans        *tp;
-        int                     error;
-        /*
-         * Put a dummy transaction in the log to tell recovery
-         * that all others are OK.
-         */
-        tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1);
-        error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
-        if (error) {
-                xfs_trans_cancel(tp, 0);
-                return error;
-        }
-        xfs_ilock(ip, XFS_ILOCK_EXCL);
-        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
-        xfs_trans_ihold(tp, ip);
-        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-        error = xfs_trans_commit(tp, 0);
-        xfs_iunlock(ip, XFS_ILOCK_EXCL);
-        /* the log force ensures this transaction is pushed to disk */
-        xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0);
-        return error;
 }
 STATIC int
@@ -444,7 +334,7 @@ xfs_quiesce_data(
        /* mark the log as covered if needed */
        if (xfs_log_need_covered(mp))
-                error2 = xfs_commit_dummy_trans(mp, SYNC_WAIT);
+                error2 = xfs_fs_log_dummy(mp, SYNC_WAIT);
        /* flush data-only devices */
        if (mp->m_rtdev_targp)
@@ -575,7 +465,7 @@ xfs_flush_inodes(
 /*
 * Every sync period we need to unpin all items, reclaim inodes and sync
 * disk quotas.  We might need to cover the log to indicate that the
- * filesystem is idle.
+ * filesystem is idle and not frozen.
 */
 STATIC void
 xfs_sync_worker(
@@ -589,8 +479,9 @@ xfs_sync_worker(
                xfs_reclaim_inodes(mp, 0);
                /* dgc: errors ignored here */
                error = xfs_qm_sync(mp, SYNC_TRYLOCK);
-                if (xfs_log_need_covered(mp))
+                if (mp->m_super->s_frozen == SB_UNFROZEN &&
-                        error = xfs_commit_dummy_trans(mp, 0);
+                    xfs_log_need_covered(mp))
+                        error = xfs_fs_log_dummy(mp, 0);
        }
        mp->m_sync_seq++;
        wake_up(&mp->m_wait_single_sync_task);
@@ -710,14 +601,11 @@ xfs_inode_set_reclaim_tag(
        xfs_perag_put(pag);
 }
-void
+STATIC void
-__xfs_inode_clear_reclaim_tag(
+__xfs_inode_clear_reclaim(
-        xfs_mount_t     *mp,
        xfs_perag_t     *pag,
        xfs_inode_t     *ip)
 {
-        radix_tree_tag_clear(&pag->pag_ici_root,
-                        XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
        pag->pag_ici_reclaimable--;
        if (!pag->pag_ici_reclaimable) {
                /* clear the reclaim tag from the perag radix tree */
@@ -731,6 +619,54 @@ __xfs_inode_clear_reclaim_tag(
        }
 }
+void
+__xfs_inode_clear_reclaim_tag(
+        xfs_mount_t     *mp,
+        xfs_perag_t     *pag,
+        xfs_inode_t     *ip)
+{
+        radix_tree_tag_clear(&pag->pag_ici_root,
+                        XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
+        __xfs_inode_clear_reclaim(pag, ip);
+}
+/*
+ * Grab the inode for reclaim exclusively.
+ * Return 0 if we grabbed it, non-zero otherwise.
+ */
+STATIC int
+xfs_reclaim_inode_grab(
+        struct xfs_inode        *ip,
+        int                     flags)
+{
+        /*
+         * do some unlocked checks first to avoid unnecceary lock traffic.
+         * The first is a flush lock check, the second is a already in reclaim
+         * check. Only do these checks if we are not going to block on locks.
+         */
+        if ((flags & SYNC_TRYLOCK) &&
+            (!ip->i_flush.done || __xfs_iflags_test(ip, XFS_IRECLAIM))) {
+                return 1;
+        }
+        /*
+         * The radix tree lock here protects a thread in xfs_iget from racing
+         * with us starting reclaim on the inode.  Once we have the
+         * XFS_IRECLAIM flag set it will not touch us.
+         */
+        spin_lock(&ip->i_flags_lock);
+        ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE));
+        if (__xfs_iflags_test(ip, XFS_IRECLAIM)) {
+                /* ignore as it is already under reclaim */
+                spin_unlock(&ip->i_flags_lock);
+                return 1;
+        }
+        __xfs_iflags_set(ip, XFS_IRECLAIM);
+        spin_unlock(&ip->i_flags_lock);
+        return 0;
+}
 /*
 * Inodes in different states need to be treated differently, and the return
 * value of xfs_iflush is not sufficient to get this right. The following table
@@ -789,23 +725,6 @@ xfs_reclaim_inode(
 {
        int     error = 0;
-        /*
-         * The radix tree lock here protects a thread in xfs_iget from racing
-         * with us starting reclaim on the inode.  Once we have the
-         * XFS_IRECLAIM flag set it will not touch us.
-         */
-        spin_lock(&ip->i_flags_lock);
-        ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE));
-        if (__xfs_iflags_test(ip, XFS_IRECLAIM)) {
-                /* ignore as it is already under reclaim */
-                spin_unlock(&ip->i_flags_lock);
-                write_unlock(&pag->pag_ici_lock);
-                return 0;
-        }
-        __xfs_iflags_set(ip, XFS_IRECLAIM);
-        spin_unlock(&ip->i_flags_lock);
-        write_unlock(&pag->pag_ici_lock);
        xfs_ilock(ip, XFS_ILOCK_EXCL);
        if (!xfs_iflock_nowait(ip)) {
                if (!(sync_mode & SYNC_WAIT))
@@ -867,18 +786,161 @@ out:
 reclaim:
        xfs_ifunlock(ip);
        xfs_iunlock(ip, XFS_ILOCK_EXCL);
-        xfs_ireclaim(ip);
+        XFS_STATS_INC(xs_ig_reclaims);
+        /*
+         * Remove the inode from the per-AG radix tree.
+         *
+         * Because radix_tree_delete won't complain even if the item was never
+         * added to the tree assert that it's been there before to catch
+         * problems with the inode life time early on.
+         */
+        write_lock(&pag->pag_ici_lock);
+        if (!radix_tree_delete(&pag->pag_ici_root,
+                                XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino)))
+                ASSERT(0);
+        __xfs_inode_clear_reclaim(pag, ip);
+        write_unlock(&pag->pag_ici_lock);
+        /*
+         * Here we do an (almost) spurious inode lock in order to coordinate
+         * with inode cache radix tree lookups.  This is because the lookup
+         * can reference the inodes in the cache without taking references.
+         *
+         * We make that OK here by ensuring that we wait until the inode is
+         * unlocked after the lookup before we go ahead and free it.  We get
+         * both the ilock and the iolock because the code may need to drop the
+         * ilock one but will still hold the iolock.
+         */
+        xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+        xfs_qm_dqdetach(ip);
+        xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+        xfs_inode_free(ip);
        return error;
 }
+/*
+ * Walk the AGs and reclaim the inodes in them. Even if the filesystem is
+ * corrupted, we still want to try to reclaim all the inodes. If we don't,
+ * then a shut down during filesystem unmount reclaim walk leak all the
+ * unreclaimed inodes.
+ */
+int
+xfs_reclaim_inodes_ag(
+        struct xfs_mount        *mp,
+        int                     flags,
+        int                     *nr_to_scan)
+{
+        struct xfs_perag        *pag;
+        int                     error = 0;
+        int                     last_error = 0;
+        xfs_agnumber_t          ag;
+        int                     trylock = flags & SYNC_TRYLOCK;
+        int                     skipped;
+restart:
+        ag = 0;
+        skipped = 0;
+        while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
+                unsigned long   first_index = 0;
+                int             done = 0;
+                int             nr_found = 0;
+                ag = pag->pag_agno + 1;
+                if (trylock) {
+                        if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) {
+                                skipped++;
+                                continue;
+                        }
+                        first_index = pag->pag_ici_reclaim_cursor;
+                } else
+                        mutex_lock(&pag->pag_ici_reclaim_lock);
+                do {
+                        struct xfs_inode *batch[XFS_LOOKUP_BATCH];
+                        int     i;
+                        write_lock(&pag->pag_ici_lock);
+                        nr_found = radix_tree_gang_lookup_tag(
+                                        &pag->pag_ici_root,
+                                        (void **)batch, first_index,
+                                        XFS_LOOKUP_BATCH,
+                                        XFS_ICI_RECLAIM_TAG);
+                        if (!nr_found) {
+                                write_unlock(&pag->pag_ici_lock);
+                                break;
+                        }
+                        /*
+                         * Grab the inodes before we drop the lock. if we found
+                         * nothing, nr == 0 and the loop will be skipped.
+                         */
+                        for (i = 0; i < nr_found; i++) {
+                                struct xfs_inode *ip = batch[i];
+                                if (done || xfs_reclaim_inode_grab(ip, flags))
+                                        batch[i] = NULL;
+                                /*
+                                 * Update the index for the next lookup. Catch
+                                 * overflows into the next AG range which can
+                                 * occur if we have inodes in the last block of
+                                 * the AG and we are currently pointing to the
+                                 * last inode.
+                                 */
+                                first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+                                if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
+                                        done = 1;
+                        }
+                        /* unlock now we've grabbed the inodes. */
+                        write_unlock(&pag->pag_ici_lock);
+                        for (i = 0; i < nr_found; i++) {
+                                if (!batch[i])
+                                        continue;
+                                error = xfs_reclaim_inode(batch[i], pag, flags);
+                                if (error && last_error != EFSCORRUPTED)
+                                        last_error = error;
+                        }
+                        *nr_to_scan -= XFS_LOOKUP_BATCH;
+                } while (nr_found && !done && *nr_to_scan > 0);
+                if (trylock && !done)
+                        pag->pag_ici_reclaim_cursor = first_index;
+                else
+                        pag->pag_ici_reclaim_cursor = 0;
+                mutex_unlock(&pag->pag_ici_reclaim_lock);
+                xfs_perag_put(pag);
+        }
+        /*
+         * if we skipped any AG, and we still have scan count remaining, do
+         * another pass this time using blocking reclaim semantics (i.e
+         * waiting on the reclaim locks and ignoring the reclaim cursors). This
+         * ensure that when we get more reclaimers than AGs we block rather
+         * than spin trying to execute reclaim.
+         */
+        if (trylock && skipped && *nr_to_scan > 0) {
+                trylock = 0;
+                goto restart;
+        }
+        return XFS_ERROR(last_error);
+}
 int
 xfs_reclaim_inodes(
        xfs_mount_t     *mp,
        int             mode)
 {
-        return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode,
+        int             nr_to_scan = INT_MAX;
-                                        XFS_ICI_RECLAIM_TAG, 1, NULL);
+        return xfs_reclaim_inodes_ag(mp, mode, &nr_to_scan);
 }
 /*
@@ -900,17 +962,16 @@ xfs_reclaim_inode_shrink(
                if (!(gfp_mask & __GFP_FS))
                        return -1;
-                xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0,
+                xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK, &nr_to_scan);
-                                        XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan);
+                /* terminate if we don't exhaust the scan */
-                /* if we don't exhaust the scan, don't bother coming back */
                if (nr_to_scan > 0)
                        return -1;
       }
        reclaimable = 0;
        ag = 0;
-        while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag,
+        while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
-                                        XFS_ICI_RECLAIM_TAG))) {
+                ag = pag->pag_agno + 1;
                reclaimable += pag->pag_ici_reclaimable;
                xfs_perag_put(pag);
        }
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index e28139aaa4aa..32ba6628290c 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -35,9 +35,6 @@ typedef struct xfs_sync_work {
 int xfs_syncd_init(struct xfs_mount *mp);
 void xfs_syncd_stop(struct xfs_mount *mp);
-int xfs_sync_attr(struct xfs_mount *mp, int flags);
-int xfs_sync_data(struct xfs_mount *mp, int flags);
 int xfs_quiesce_data(struct xfs_mount *mp);
 void xfs_quiesce_attr(struct xfs_mount *mp);
@@ -50,10 +47,10 @@ void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip);
 void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
                                struct xfs_inode *ip);
-int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag);
+int xfs_sync_inode_grab(struct xfs_inode *ip);
 int xfs_inode_ag_iterator(struct xfs_mount *mp,
        int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
-        int flags, int tag, int write_lock, int *nr_to_scan);
+        int flags);
 void xfs_inode_shrinker_register(struct xfs_mount *mp);
 void xfs_inode_shrinker_unregister(struct xfs_mount *mp);
diff --git a/fs/xfs/linux-2.6/xfs_trace.c b/fs/xfs/linux-2.6/xfs_trace.c
index d12be8470cba..88d25d4aa56e 100644
--- a/fs/xfs/linux-2.6/xfs_trace.c
+++ b/fs/xfs/linux-2.6/xfs_trace.c
@@ -24,17 +24,13 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
 #include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_btree.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_ialloc.h"
 #include "xfs_itable.h"
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index 302820690904..acef2e98c594 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -124,7 +124,7 @@ DEFINE_EVENT(xfs_perag_class, name,	\
                 unsigned long caller_ip),                                      \
        TP_ARGS(mp, agno, refcount, caller_ip))
 DEFINE_PERAG_REF_EVENT(xfs_perag_get);
-DEFINE_PERAG_REF_EVENT(xfs_perag_get_reclaim);
+DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag);
 DEFINE_PERAG_REF_EVENT(xfs_perag_put);
 DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim);
 DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim);
@@ -317,8 +317,6 @@ DEFINE_BUF_EVENT(xfs_buf_init);
 DEFINE_BUF_EVENT(xfs_buf_free);
 DEFINE_BUF_EVENT(xfs_buf_hold);
 DEFINE_BUF_EVENT(xfs_buf_rele);
-DEFINE_BUF_EVENT(xfs_buf_pin);
-DEFINE_BUF_EVENT(xfs_buf_unpin);
 DEFINE_BUF_EVENT(xfs_buf_iodone);
 DEFINE_BUF_EVENT(xfs_buf_iorequest);
 DEFINE_BUF_EVENT(xfs_buf_bawrite);
@@ -327,13 +325,12 @@ DEFINE_BUF_EVENT(xfs_buf_lock);
 DEFINE_BUF_EVENT(xfs_buf_lock_done);
 DEFINE_BUF_EVENT(xfs_buf_cond_lock);
 DEFINE_BUF_EVENT(xfs_buf_unlock);
-DEFINE_BUF_EVENT(xfs_buf_ordered_retry);
 DEFINE_BUF_EVENT(xfs_buf_iowait);
 DEFINE_BUF_EVENT(xfs_buf_iowait_done);
 DEFINE_BUF_EVENT(xfs_buf_delwri_queue);
 DEFINE_BUF_EVENT(xfs_buf_delwri_dequeue);
 DEFINE_BUF_EVENT(xfs_buf_delwri_split);
-DEFINE_BUF_EVENT(xfs_buf_get_noaddr);
+DEFINE_BUF_EVENT(xfs_buf_get_uncached);
 DEFINE_BUF_EVENT(xfs_bdstrat_shut);
 DEFINE_BUF_EVENT(xfs_buf_item_relse);
 DEFINE_BUF_EVENT(xfs_buf_item_iodone);
@@ -541,7 +538,7 @@ DEFINE_LOCK_EVENT(xfs_ilock_nowait);
 DEFINE_LOCK_EVENT(xfs_ilock_demote);
 DEFINE_LOCK_EVENT(xfs_iunlock);
-DECLARE_EVENT_CLASS(xfs_iget_class,
+DECLARE_EVENT_CLASS(xfs_inode_class,
        TP_PROTO(struct xfs_inode *ip),
        TP_ARGS(ip),
        TP_STRUCT__entry(
@@ -557,16 +554,38 @@ DECLARE_EVENT_CLASS(xfs_iget_class,
                  __entry->ino)
 )
-#define DEFINE_IGET_EVENT(name) \
+#define DEFINE_INODE_EVENT(name) \
-DEFINE_EVENT(xfs_iget_class, name, \
+DEFINE_EVENT(xfs_inode_class, name, \
        TP_PROTO(struct xfs_inode *ip), \
        TP_ARGS(ip))
-DEFINE_IGET_EVENT(xfs_iget_skip);
+DEFINE_INODE_EVENT(xfs_iget_skip);
-DEFINE_IGET_EVENT(xfs_iget_reclaim);
+DEFINE_INODE_EVENT(xfs_iget_reclaim);
-DEFINE_IGET_EVENT(xfs_iget_found);
+DEFINE_INODE_EVENT(xfs_iget_reclaim_fail);
-DEFINE_IGET_EVENT(xfs_iget_alloc);
+DEFINE_INODE_EVENT(xfs_iget_hit);
+DEFINE_INODE_EVENT(xfs_iget_miss);
-DECLARE_EVENT_CLASS(xfs_inode_class,
+DEFINE_INODE_EVENT(xfs_getattr);
+DEFINE_INODE_EVENT(xfs_setattr);
+DEFINE_INODE_EVENT(xfs_readlink);
+DEFINE_INODE_EVENT(xfs_alloc_file_space);
+DEFINE_INODE_EVENT(xfs_free_file_space);
+DEFINE_INODE_EVENT(xfs_readdir);
+#ifdef CONFIG_XFS_POSIX_ACL
+DEFINE_INODE_EVENT(xfs_check_acl);
+#endif
+DEFINE_INODE_EVENT(xfs_vm_bmap);
+DEFINE_INODE_EVENT(xfs_file_ioctl);
+DEFINE_INODE_EVENT(xfs_file_compat_ioctl);
+DEFINE_INODE_EVENT(xfs_ioctl_setattr);
+DEFINE_INODE_EVENT(xfs_file_fsync);
+DEFINE_INODE_EVENT(xfs_destroy_inode);
+DEFINE_INODE_EVENT(xfs_write_inode);
+DEFINE_INODE_EVENT(xfs_evict_inode);
+DEFINE_INODE_EVENT(xfs_dquot_dqalloc);
+DEFINE_INODE_EVENT(xfs_dquot_dqdetach);
+DECLARE_EVENT_CLASS(xfs_iref_class,
        TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
        TP_ARGS(ip, caller_ip),
        TP_STRUCT__entry(
@@ -591,20 +610,71 @@ DECLARE_EVENT_CLASS(xfs_inode_class,
                  (char *)__entry->caller_ip)
 )
-#define DEFINE_INODE_EVENT(name) \
+#define DEFINE_IREF_EVENT(name) \
-DEFINE_EVENT(xfs_inode_class, name, \
+DEFINE_EVENT(xfs_iref_class, name, \
        TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \
        TP_ARGS(ip, caller_ip))
-DEFINE_INODE_EVENT(xfs_ihold);
+DEFINE_IREF_EVENT(xfs_ihold);
-DEFINE_INODE_EVENT(xfs_irele);
+DEFINE_IREF_EVENT(xfs_irele);
-DEFINE_INODE_EVENT(xfs_inode_pin);
+DEFINE_IREF_EVENT(xfs_inode_pin);
-DEFINE_INODE_EVENT(xfs_inode_unpin);
+DEFINE_IREF_EVENT(xfs_inode_unpin);
-DEFINE_INODE_EVENT(xfs_inode_unpin_nowait);
+DEFINE_IREF_EVENT(xfs_inode_unpin_nowait);
+DECLARE_EVENT_CLASS(xfs_namespace_class,
+        TP_PROTO(struct xfs_inode *dp, struct xfs_name *name),
+        TP_ARGS(dp, name),
+        TP_STRUCT__entry(
+                __field(dev_t, dev)
+                __field(xfs_ino_t, dp_ino)
+                __dynamic_array(char, name, name->len)
+        ),
+        TP_fast_assign(
+                __entry->dev = VFS_I(dp)->i_sb->s_dev;
+                __entry->dp_ino = dp->i_ino;
+                memcpy(__get_str(name), name->name, name->len);
+        ),
+        TP_printk("dev %d:%d dp ino 0x%llx name %s",
+                  MAJOR(__entry->dev), MINOR(__entry->dev),
+                  __entry->dp_ino,
+                  __get_str(name))
+)
-/* the old xfs_itrace_entry tracer - to be replaced by s.th. in the VFS */
+#define DEFINE_NAMESPACE_EVENT(name) \
-DEFINE_INODE_EVENT(xfs_inode);
+DEFINE_EVENT(xfs_namespace_class, name, \
-#define xfs_itrace_entry(ip)    \
+        TP_PROTO(struct xfs_inode *dp, struct xfs_name *name), \
-        trace_xfs_inode(ip, _THIS_IP_)
+        TP_ARGS(dp, name))
+DEFINE_NAMESPACE_EVENT(xfs_remove);
+DEFINE_NAMESPACE_EVENT(xfs_link);
+DEFINE_NAMESPACE_EVENT(xfs_lookup);
+DEFINE_NAMESPACE_EVENT(xfs_create);
+DEFINE_NAMESPACE_EVENT(xfs_symlink);
+TRACE_EVENT(xfs_rename,
+        TP_PROTO(struct xfs_inode *src_dp, struct xfs_inode *target_dp,
+                 struct xfs_name *src_name, struct xfs_name *target_name),
+        TP_ARGS(src_dp, target_dp, src_name, target_name),
+        TP_STRUCT__entry(
+                __field(dev_t, dev)
+                __field(xfs_ino_t, src_dp_ino)
+                __field(xfs_ino_t, target_dp_ino)
+                __dynamic_array(char, src_name, src_name->len)
+                __dynamic_array(char, target_name, target_name->len)
+        ),
+        TP_fast_assign(
+                __entry->dev = VFS_I(src_dp)->i_sb->s_dev;
+                __entry->src_dp_ino = src_dp->i_ino;
+                __entry->target_dp_ino = target_dp->i_ino;
+                memcpy(__get_str(src_name), src_name->name, src_name->len);
+                memcpy(__get_str(target_name), target_name->name, target_name->len);
+        ),
+        TP_printk("dev %d:%d src dp ino 0x%llx target dp ino 0x%llx"
+                  " src name %s target name %s",
+                  MAJOR(__entry->dev), MINOR(__entry->dev),
+                  __entry->src_dp_ino,
+                  __entry->target_dp_ino,
+                  __get_str(src_name),
+                  __get_str(target_name))
+)
 DECLARE_EVENT_CLASS(xfs_dquot_class,
        TP_PROTO(struct xfs_dquot *dqp),
@@ -684,9 +754,6 @@ DEFINE_DQUOT_EVENT(xfs_dqrele);
 DEFINE_DQUOT_EVENT(xfs_dqflush);
 DEFINE_DQUOT_EVENT(xfs_dqflush_force);
 DEFINE_DQUOT_EVENT(xfs_dqflush_done);
-/* not really iget events, but we re-use the format */
-DEFINE_IGET_EVENT(xfs_dquot_dqalloc);
-DEFINE_IGET_EVENT(xfs_dquot_dqdetach);
 DECLARE_EVENT_CLASS(xfs_loggrant_class,
        TP_PROTO(struct log *log, struct xlog_ticket *tic),
@@ -834,33 +901,29 @@ DECLARE_EVENT_CLASS(xfs_page_class,
                __field(loff_t, size)
                __field(unsigned long, offset)
                __field(int, delalloc)
-                __field(int, unmapped)
                __field(int, unwritten)
        ),
        TP_fast_assign(
-                int delalloc = -1, unmapped = -1, unwritten = -1;
+                int delalloc = -1, unwritten = -1;
                if (page_has_buffers(page))
-                        xfs_count_page_state(page, &delalloc,
+                        xfs_count_page_state(page, &delalloc, &unwritten);
-                                             &unmapped, &unwritten);
                __entry->dev = inode->i_sb->s_dev;
                __entry->ino = XFS_I(inode)->i_ino;
                __entry->pgoff = page_offset(page);
                __entry->size = i_size_read(inode);
                __entry->offset = off;
                __entry->delalloc = delalloc;
-                __entry->unmapped = unmapped;
                __entry->unwritten = unwritten;
        ),
        TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx "
-                  "delalloc %d unmapped %d unwritten %d",
+                  "delalloc %d unwritten %d",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->ino,
                  __entry->pgoff,
                  __entry->size,
                  __entry->offset,
                  __entry->delalloc,
-                  __entry->unmapped,
                  __entry->unwritten)
 )
diff --git a/fs/xfs/linux-2.6/xfs_version.h b/fs/xfs/linux-2.6/xfs_version.h
deleted file mode 100644
index f8d279d7563a..000000000000
--- a/fs/xfs/linux-2.6/xfs_version.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2001-2002,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_VERSION_H__
-#define __XFS_VERSION_H__
-/*
- * Dummy file that can contain a timestamp to put into the
- * XFS init string, to help users keep track of what they're
- * running
- */
-#define XFS_VERSION_STRING "SGI XFS"
-#endif /* __XFS_VERSION_H__ */
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 585e7633dfc7..faf8e1a83a12 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -23,25 +23,15 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
 #include "xfs_alloc.h"
-#include "xfs_dmapi.h"
 #include "xfs_quota.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dinode.h"
 #include "xfs_inode.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
 #include "xfs_bmap.h"
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
 #include "xfs_itable.h"
-#include "xfs_rw.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_trans_space.h"
@@ -64,8 +54,6 @@
   flush lock - ditto.
 */
-STATIC void             xfs_qm_dqflush_done(xfs_buf_t *, xfs_dq_logitem_t *);
 #ifdef DEBUG
 xfs_buftarg_t *xfs_dqerror_target;
 int xfs_do_dqerror;
@@ -390,21 +378,14 @@ xfs_qm_dqalloc(
                return (ESRCH);
        }
-        /*
+        xfs_trans_ijoin_ref(tp, quotip, XFS_ILOCK_EXCL);
-         * xfs_trans_commit normally decrements the vnode ref count
-         * when it unlocks the inode. Since we want to keep the quota
-         * inode around, we bump the vnode ref count now.
-         */
-        IHOLD(quotip);
-        xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL);
        nmaps = 1;
        if ((error = xfs_bmapi(tp, quotip,
                              offset_fsb, XFS_DQUOT_CLUSTER_SIZE_FSB,
                              XFS_BMAPI_METADATA | XFS_BMAPI_WRITE,
                              &firstblock,
                              XFS_QM_DQALLOC_SPACE_RES(mp),
-                              &map, &nmaps, &flist, NULL))) {
+                              &map, &nmaps, &flist))) {
                goto error0;
        }
        ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB);
@@ -482,87 +463,68 @@ xfs_qm_dqtobp(
        uint                    flags)
 {
        xfs_bmbt_irec_t map;
-        int             nmaps, error;
+        int             nmaps = 1, error;
        xfs_buf_t       *bp;
-        xfs_inode_t     *quotip;
+        xfs_inode_t     *quotip = XFS_DQ_TO_QIP(dqp);
-        xfs_mount_t     *mp;
+        xfs_mount_t     *mp = dqp->q_mount;
        xfs_disk_dquot_t *ddq;
-        xfs_dqid_t      id;
+        xfs_dqid_t      id = be32_to_cpu(dqp->q_core.d_id);
-        boolean_t       newdquot;
        xfs_trans_t     *tp = (tpp ? *tpp : NULL);
-        mp = dqp->q_mount;
+        dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk;
-        id = be32_to_cpu(dqp->q_core.d_id);
-        nmaps = 1;
-        newdquot = B_FALSE;
-        /*
+        xfs_ilock(quotip, XFS_ILOCK_SHARED);
-         * If we don't know where the dquot lives, find out.
+        if (XFS_IS_THIS_QUOTA_OFF(dqp)) {
-         */
-        if (dqp->q_blkno == (xfs_daddr_t) 0) {
-                /* We use the id as an index */
-                dqp->q_fileoffset = (xfs_fileoff_t)id /
-                                        mp->m_quotainfo->qi_dqperchunk;
-                nmaps = 1;
-                quotip = XFS_DQ_TO_QIP(dqp);
-                xfs_ilock(quotip, XFS_ILOCK_SHARED);
                /*
-                 * Return if this type of quotas is turned off while we didn't
+                 * Return if this type of quotas is turned off while we
-                 * have an inode lock
+                 * didn't have the quota inode lock.
                 */
-                if (XFS_IS_THIS_QUOTA_OFF(dqp)) {
+                xfs_iunlock(quotip, XFS_ILOCK_SHARED);
-                        xfs_iunlock(quotip, XFS_ILOCK_SHARED);
+                return ESRCH;
-                        return (ESRCH);
+        }
-                }
+        /*
+         * Find the block map; no allocations yet
+         */
+        error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset,
+                          XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA,
+                          NULL, 0, &map, &nmaps, NULL);
+        xfs_iunlock(quotip, XFS_ILOCK_SHARED);
+        if (error)
+                return error;
+        ASSERT(nmaps == 1);
+        ASSERT(map.br_blockcount == 1);
+        /*
+         * Offset of dquot in the (fixed sized) dquot chunk.
+         */
+        dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) *
+                sizeof(xfs_dqblk_t);
+        ASSERT(map.br_startblock != DELAYSTARTBLOCK);
+        if (map.br_startblock == HOLESTARTBLOCK) {
                /*
-                 * Find the block map; no allocations yet
+                 * We don't allocate unless we're asked to
                 */
-                error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset,
+                if (!(flags & XFS_QMOPT_DQALLOC))
-                                  XFS_DQUOT_CLUSTER_SIZE_FSB,
+                        return ENOENT;
-                                  XFS_BMAPI_METADATA,
-                                  NULL, 0, &map, &nmaps, NULL, NULL);
-                xfs_iunlock(quotip, XFS_ILOCK_SHARED);
+                ASSERT(tp);
+                error = xfs_qm_dqalloc(tpp, mp, dqp, quotip,
+                                        dqp->q_fileoffset, &bp);
                if (error)
-                        return (error);
+                        return error;
-                ASSERT(nmaps == 1);
+                tp = *tpp;
-                ASSERT(map.br_blockcount == 1);
+        } else {
+                trace_xfs_dqtobp_read(dqp);
                /*
-                 * offset of dquot in the (fixed sized) dquot chunk.
+                 * store the blkno etc so that we don't have to do the
+                 * mapping all the time
                 */
-                dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) *
+                dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
-                        sizeof(xfs_dqblk_t);
-                if (map.br_startblock == HOLESTARTBLOCK) {
-                        /*
-                         * We don't allocate unless we're asked to
-                         */
-                        if (!(flags & XFS_QMOPT_DQALLOC))
-                                return (ENOENT);
-                        ASSERT(tp);
-                        if ((error = xfs_qm_dqalloc(tpp, mp, dqp, quotip,
-                                                dqp->q_fileoffset, &bp)))
-                                return (error);
-                        tp = *tpp;
-                        newdquot = B_TRUE;
-                } else {
-                        /*
-                         * store the blkno etc so that we don't have to do the
-                         * mapping all the time
-                         */
-                        dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
-                }
-        }
-        ASSERT(dqp->q_blkno != DELAYSTARTBLOCK);
-        ASSERT(dqp->q_blkno != HOLESTARTBLOCK);
-        /*
-         * Read in the buffer, unless we've just done the allocation
-         * (in which case we already have the buf).
-         */
-        if (!newdquot) {
-                trace_xfs_dqtobp_read(dqp);
                error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
                                           dqp->q_blkno,
@@ -571,13 +533,14 @@ xfs_qm_dqtobp(
                if (error || !bp)
                        return XFS_ERROR(error);
        }
        ASSERT(XFS_BUF_ISBUSY(bp));
        ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
        /*
         * calculate the location of the dquot inside the buffer.
         */
-        ddq = (xfs_disk_dquot_t *)((char *)XFS_BUF_PTR(bp) + dqp->q_bufoffset);
+        ddq = (struct xfs_disk_dquot *)(XFS_BUF_PTR(bp) + dqp->q_bufoffset);
        /*
         * A simple sanity check in case we got a corrupted dquot...
@@ -1141,6 +1104,46 @@ xfs_qm_dqrele(
        xfs_qm_dqput(dqp);
 }
+/*
+ * This is the dquot flushing I/O completion routine.  It is called
+ * from interrupt level when the buffer containing the dquot is
+ * flushed to disk.  It is responsible for removing the dquot logitem
+ * from the AIL if it has not been re-logged, and unlocking the dquot's
+ * flush lock. This behavior is very similar to that of inodes..
+ */
+STATIC void
+xfs_qm_dqflush_done(
+        struct xfs_buf          *bp,
+        struct xfs_log_item     *lip)
+{
+        xfs_dq_logitem_t        *qip = (struct xfs_dq_logitem *)lip;
+        xfs_dquot_t             *dqp = qip->qli_dquot;
+        struct xfs_ail          *ailp = lip->li_ailp;
+        /*
+         * We only want to pull the item from the AIL if its
+         * location in the log has not changed since we started the flush.
+         * Thus, we only bother if the dquot's lsn has
+         * not changed. First we check the lsn outside the lock
+         * since it's cheaper, and then we recheck while
+         * holding the lock before removing the dquot from the AIL.
+         */
+        if ((lip->li_flags & XFS_LI_IN_AIL) &&
+            lip->li_lsn == qip->qli_flush_lsn) {
+                /* xfs_trans_ail_delete() drops the AIL lock. */
+                spin_lock(&ailp->xa_lock);
+                if (lip->li_lsn == qip->qli_flush_lsn)
+                        xfs_trans_ail_delete(ailp, lip);
+                else
+                        spin_unlock(&ailp->xa_lock);
+        }
+        /*
+         * Release the dq's flush lock since we're done with it.
+         */
+        xfs_dqfunlock(dqp);
+}
 /*
 * Write a modified dquot to disk.
@@ -1155,18 +1158,18 @@ xfs_qm_dqflush(
        xfs_dquot_t             *dqp,
        uint                    flags)
 {
-        xfs_mount_t             *mp;
+        struct xfs_mount        *mp = dqp->q_mount;
-        xfs_buf_t               *bp;
+        struct xfs_buf          *bp;
-        xfs_disk_dquot_t        *ddqp;
+        struct xfs_disk_dquot   *ddqp;
        int                     error;
        ASSERT(XFS_DQ_IS_LOCKED(dqp));
        ASSERT(!completion_done(&dqp->q_flush));
        trace_xfs_dqflush(dqp);
        /*
-         * If not dirty, or it's pinned and we are not supposed to
+         * If not dirty, or it's pinned and we are not supposed to block, nada.
-         * block, nada.
         */
        if (!XFS_DQ_IS_DIRTY(dqp) ||
            (!(flags & SYNC_WAIT) && atomic_read(&dqp->q_pincount) > 0)) {
@@ -1180,40 +1183,46 @@ xfs_qm_dqflush(
         * down forcibly. If that's the case we must not write this dquot
         * to disk, because the log record didn't make it to disk!
         */
-        if (XFS_FORCED_SHUTDOWN(dqp->q_mount)) {
+        if (XFS_FORCED_SHUTDOWN(mp)) {
-                dqp->dq_flags &= ~(XFS_DQ_DIRTY);
+                dqp->dq_flags &= ~XFS_DQ_DIRTY;
                xfs_dqfunlock(dqp);
                return XFS_ERROR(EIO);
        }
        /*
         * Get the buffer containing the on-disk dquot
-         * We don't need a transaction envelope because we know that the
-         * the ondisk-dquot has already been allocated for.
         */
-        if ((error = xfs_qm_dqtobp(NULL, dqp, &ddqp, &bp, XFS_QMOPT_DOWARN))) {
+        error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno,
+                                   mp->m_quotainfo->qi_dqchunklen, 0, &bp);
+        if (error) {
                ASSERT(error != ENOENT);
-                /*
-                 * Quotas could have gotten turned off (ESRCH)
-                 */
                xfs_dqfunlock(dqp);
-                return (error);
+                return error;
        }
-        if (xfs_qm_dqcheck(&dqp->q_core, be32_to_cpu(ddqp->d_id),
+        /*
-                           0, XFS_QMOPT_DOWARN, "dqflush (incore copy)")) {
+         * Calculate the location of the dquot inside the buffer.
-                xfs_force_shutdown(dqp->q_mount, SHUTDOWN_CORRUPT_INCORE);
+         */
+        ddqp = (struct xfs_disk_dquot *)(XFS_BUF_PTR(bp) + dqp->q_bufoffset);
+        /*
+         * A simple sanity check in case we got a corrupted dquot..
+         */
+        if (xfs_qm_dqcheck(&dqp->q_core, be32_to_cpu(ddqp->d_id), 0,
+                           XFS_QMOPT_DOWARN, "dqflush (incore copy)")) {
+                xfs_buf_relse(bp);
+                xfs_dqfunlock(dqp);
+                xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
                return XFS_ERROR(EIO);
        }
        /* This is the only portion of data that needs to persist */
-        memcpy(ddqp, &(dqp->q_core), sizeof(xfs_disk_dquot_t));
+        memcpy(ddqp, &dqp->q_core, sizeof(xfs_disk_dquot_t));
        /*
         * Clear the dirty field and remember the flush lsn for later use.
         */
-        dqp->dq_flags &= ~(XFS_DQ_DIRTY);
+        dqp->dq_flags &= ~XFS_DQ_DIRTY;
-        mp = dqp->q_mount;
        xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn,
                                        &dqp->q_logitem.qli_item.li_lsn);
@@ -1222,8 +1231,9 @@ xfs_qm_dqflush(
         * Attach an iodone routine so that we can remove this dquot from the
         * AIL and release the flush lock once the dquot is synced to disk.
         */
-        xfs_buf_attach_iodone(bp, (void(*)(xfs_buf_t *, xfs_log_item_t *))
+        xfs_buf_attach_iodone(bp, xfs_qm_dqflush_done,
-                              xfs_qm_dqflush_done, &(dqp->q_logitem.qli_item));
+                                  &dqp->q_logitem.qli_item);
        /*
         * If the buffer is pinned then push on the log so we won't
         * get stuck waiting in the write for too long.
@@ -1247,50 +1257,6 @@ xfs_qm_dqflush(
 }
-/*
- * This is the dquot flushing I/O completion routine.  It is called
- * from interrupt level when the buffer containing the dquot is
- * flushed to disk.  It is responsible for removing the dquot logitem
- * from the AIL if it has not been re-logged, and unlocking the dquot's
- * flush lock. This behavior is very similar to that of inodes..
- */
-/*ARGSUSED*/
-STATIC void
-xfs_qm_dqflush_done(
-        xfs_buf_t               *bp,
-        xfs_dq_logitem_t        *qip)
-{
-        xfs_dquot_t             *dqp;
-        struct xfs_ail          *ailp;
-        dqp = qip->qli_dquot;
-        ailp = qip->qli_item.li_ailp;
-        /*
-         * We only want to pull the item from the AIL if its
-         * location in the log has not changed since we started the flush.
-         * Thus, we only bother if the dquot's lsn has
-         * not changed. First we check the lsn outside the lock
-         * since it's cheaper, and then we recheck while
-         * holding the lock before removing the dquot from the AIL.
-         */
-        if ((qip->qli_item.li_flags & XFS_LI_IN_AIL) &&
-            qip->qli_item.li_lsn == qip->qli_flush_lsn) {
-                /* xfs_trans_ail_delete() drops the AIL lock. */
-                spin_lock(&ailp->xa_lock);
-                if (qip->qli_item.li_lsn == qip->qli_flush_lsn)
-                        xfs_trans_ail_delete(ailp, (xfs_log_item_t*)qip);
-                else
-                        spin_unlock(&ailp->xa_lock);
-        }
-        /*
-         * Release the dq's flush lock since we're done with it.
-         */
-        xfs_dqfunlock(dqp);
-}
 int
 xfs_qm_dqlock_nowait(
        xfs_dquot_t *dqp)
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index 8d89a24ae324..2a1f3dc10a02 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -23,42 +23,36 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
 #include "xfs_alloc.h"
-#include "xfs_dmapi.h"
 #include "xfs_quota.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_bmap.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
 #include "xfs_itable.h"
-#include "xfs_rw.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_trans_priv.h"
 #include "xfs_qm.h"
+static inline struct xfs_dq_logitem *DQUOT_ITEM(struct xfs_log_item *lip)
+{
+        return container_of(lip, struct xfs_dq_logitem, qli_item);
+}
 /*
 * returns the number of iovecs needed to log the given dquot item.
 */
-/* ARGSUSED */
 STATIC uint
 xfs_qm_dquot_logitem_size(
-        xfs_dq_logitem_t        *logitem)
+        struct xfs_log_item     *lip)
 {
        /*
         * we need only two iovecs, one for the format, one for the real thing
         */
-        return (2);
+        return 2;
 }
 /*
@@ -66,22 +60,21 @@ xfs_qm_dquot_logitem_size(
 */
 STATIC void
 xfs_qm_dquot_logitem_format(
-        xfs_dq_logitem_t        *logitem,
+        struct xfs_log_item     *lip,
-        xfs_log_iovec_t         *logvec)
+        struct xfs_log_iovec    *logvec)
 {
-        ASSERT(logitem);
+        struct xfs_dq_logitem   *qlip = DQUOT_ITEM(lip);
-        ASSERT(logitem->qli_dquot);
-        logvec->i_addr = (xfs_caddr_t)&logitem->qli_format;
+        logvec->i_addr = &qlip->qli_format;
        logvec->i_len  = sizeof(xfs_dq_logformat_t);
        logvec->i_type = XLOG_REG_TYPE_QFORMAT;
        logvec++;
-        logvec->i_addr = (xfs_caddr_t)&logitem->qli_dquot->q_core;
+        logvec->i_addr = &qlip->qli_dquot->q_core;
        logvec->i_len  = sizeof(xfs_disk_dquot_t);
        logvec->i_type = XLOG_REG_TYPE_DQUOT;
-        ASSERT(2 == logitem->qli_item.li_desc->lid_size);
+        ASSERT(2 == lip->li_desc->lid_size);
-        logitem->qli_format.qlf_size = 2;
+        qlip->qli_format.qlf_size = 2;
 }
@@ -90,9 +83,9 @@ xfs_qm_dquot_logitem_format(
 */
 STATIC void
 xfs_qm_dquot_logitem_pin(
-        xfs_dq_logitem_t *logitem)
+        struct xfs_log_item     *lip)
 {
-        xfs_dquot_t *dqp = logitem->qli_dquot;
+        struct xfs_dquot        *dqp = DQUOT_ITEM(lip)->qli_dquot;
        ASSERT(XFS_DQ_IS_LOCKED(dqp));
        atomic_inc(&dqp->q_pincount);
@@ -104,27 +97,18 @@ xfs_qm_dquot_logitem_pin(
 * dquot must have been previously pinned with a call to
 * xfs_qm_dquot_logitem_pin().
 */
-/* ARGSUSED */
 STATIC void
 xfs_qm_dquot_logitem_unpin(
-        xfs_dq_logitem_t *logitem)
+        struct xfs_log_item     *lip,
+        int                     remove)
 {
-        xfs_dquot_t *dqp = logitem->qli_dquot;
+        struct xfs_dquot        *dqp = DQUOT_ITEM(lip)->qli_dquot;
        ASSERT(atomic_read(&dqp->q_pincount) > 0);
        if (atomic_dec_and_test(&dqp->q_pincount))
                wake_up(&dqp->q_pinwait);
 }
-/* ARGSUSED */
-STATIC void
-xfs_qm_dquot_logitem_unpin_remove(
-        xfs_dq_logitem_t *logitem,
-        xfs_trans_t      *tp)
-{
-        xfs_qm_dquot_logitem_unpin(logitem);
-}
 /*
 * Given the logitem, this writes the corresponding dquot entry to disk
 * asynchronously. This is called with the dquot entry securely locked;
@@ -133,12 +117,10 @@ xfs_qm_dquot_logitem_unpin_remove(
 */
 STATIC void
 xfs_qm_dquot_logitem_push(
-        xfs_dq_logitem_t        *logitem)
+        struct xfs_log_item     *lip)
 {
-        xfs_dquot_t     *dqp;
+        struct xfs_dquot        *dqp = DQUOT_ITEM(lip)->qli_dquot;
-        int             error;
+        int                     error;
-        dqp = logitem->qli_dquot;
        ASSERT(XFS_DQ_IS_LOCKED(dqp));
        ASSERT(!completion_done(&dqp->q_flush));
@@ -160,27 +142,25 @@ xfs_qm_dquot_logitem_push(
        xfs_dqunlock(dqp);
 }
-/*ARGSUSED*/
 STATIC xfs_lsn_t
 xfs_qm_dquot_logitem_committed(
-        xfs_dq_logitem_t        *l,
+        struct xfs_log_item     *lip,
        xfs_lsn_t               lsn)
 {
        /*
         * We always re-log the entire dquot when it becomes dirty,
         * so, the latest copy _is_ the only one that matters.
         */
-        return (lsn);
+        return lsn;
 }
 /*
 * This is called to wait for the given dquot to be unpinned.
 * Most of these pin/unpin routines are plagiarized from inode code.
 */
 void
 xfs_qm_dqunpin_wait(
-        xfs_dquot_t     *dqp)
+        struct xfs_dquot        *dqp)
 {
        ASSERT(XFS_DQ_IS_LOCKED(dqp));
        if (atomic_read(&dqp->q_pincount) == 0)
@@ -206,13 +186,12 @@ xfs_qm_dqunpin_wait(
 */
 STATIC void
 xfs_qm_dquot_logitem_pushbuf(
-        xfs_dq_logitem_t    *qip)
+        struct xfs_log_item     *lip)
 {
-        xfs_dquot_t     *dqp;
+        struct xfs_dq_logitem   *qlip = DQUOT_ITEM(lip);
-        xfs_mount_t     *mp;
+        struct xfs_dquot        *dqp = qlip->qli_dquot;
-        xfs_buf_t       *bp;
+        struct xfs_buf          *bp;
-        dqp = qip->qli_dquot;
        ASSERT(XFS_DQ_IS_LOCKED(dqp));
        /*
@@ -220,22 +199,20 @@ xfs_qm_dquot_logitem_pushbuf(
         * inode flush completed and the inode was taken off the AIL.
         * So, just get out.
         */
-        if (completion_done(&dqp->q_flush)  ||
+        if (completion_done(&dqp->q_flush) ||
-            ((qip->qli_item.li_flags & XFS_LI_IN_AIL) == 0)) {
+            !(lip->li_flags & XFS_LI_IN_AIL)) {
                xfs_dqunlock(dqp);
                return;
        }
-        mp = dqp->q_mount;
-        bp = xfs_incore(mp->m_ddev_targp, qip->qli_format.qlf_blkno,
+        bp = xfs_incore(dqp->q_mount->m_ddev_targp, qlip->qli_format.qlf_blkno,
-                        mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK);
+                        dqp->q_mount->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK);
        xfs_dqunlock(dqp);
        if (!bp)
                return;
        if (XFS_BUF_ISDELAYWRITE(bp))
                xfs_buf_delwri_promote(bp);
        xfs_buf_relse(bp);
-        return;
 }
 /*
@@ -250,15 +227,14 @@ xfs_qm_dquot_logitem_pushbuf(
 */
 STATIC uint
 xfs_qm_dquot_logitem_trylock(
-        xfs_dq_logitem_t        *qip)
+        struct xfs_log_item     *lip)
 {
-        xfs_dquot_t             *dqp;
+        struct xfs_dquot        *dqp = DQUOT_ITEM(lip)->qli_dquot;
-        dqp = qip->qli_dquot;
        if (atomic_read(&dqp->q_pincount) > 0)
                return XFS_ITEM_PINNED;
-        if (! xfs_qm_dqlock_nowait(dqp))
+        if (!xfs_qm_dqlock_nowait(dqp))
                return XFS_ITEM_LOCKED;
        if (!xfs_dqflock_nowait(dqp)) {
@@ -269,11 +245,10 @@ xfs_qm_dquot_logitem_trylock(
                return XFS_ITEM_PUSHBUF;
        }
-        ASSERT(qip->qli_item.li_flags & XFS_LI_IN_AIL);
+        ASSERT(lip->li_flags & XFS_LI_IN_AIL);
        return XFS_ITEM_SUCCESS;
 }
 /*
 * Unlock the dquot associated with the log item.
 * Clear the fields of the dquot and dquot log item that
@@ -282,12 +257,10 @@ xfs_qm_dquot_logitem_trylock(
 */
 STATIC void
 xfs_qm_dquot_logitem_unlock(
-        xfs_dq_logitem_t    *ql)
+        struct xfs_log_item     *lip)
 {
-        xfs_dquot_t     *dqp;
+        struct xfs_dquot        *dqp = DQUOT_ITEM(lip)->qli_dquot;
-        ASSERT(ql != NULL);
-        dqp = ql->qli_dquot;
        ASSERT(XFS_DQ_IS_LOCKED(dqp));
        /*
@@ -304,43 +277,32 @@ xfs_qm_dquot_logitem_unlock(
        xfs_dqunlock(dqp);
 }
 /*
 * this needs to stamp an lsn into the dquot, I think.
 * rpc's that look at user dquot's would then have to
 * push on the dependency recorded in the dquot
 */
-/* ARGSUSED */
 STATIC void
 xfs_qm_dquot_logitem_committing(
-        xfs_dq_logitem_t        *l,
+        struct xfs_log_item     *lip,
        xfs_lsn_t               lsn)
 {
-        return;
 }
 /*
 * This is the ops vector for dquots
 */
 static struct xfs_item_ops xfs_dquot_item_ops = {
-        .iop_size       = (uint(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_size,
+        .iop_size       = xfs_qm_dquot_logitem_size,
-        .iop_format     = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
+        .iop_format     = xfs_qm_dquot_logitem_format,
-                                        xfs_qm_dquot_logitem_format,
+        .iop_pin        = xfs_qm_dquot_logitem_pin,
-        .iop_pin        = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_pin,
+        .iop_unpin      = xfs_qm_dquot_logitem_unpin,
-        .iop_unpin      = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_unpin,
+        .iop_trylock    = xfs_qm_dquot_logitem_trylock,
-        .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*))
+        .iop_unlock     = xfs_qm_dquot_logitem_unlock,
-                                        xfs_qm_dquot_logitem_unpin_remove,
+        .iop_committed  = xfs_qm_dquot_logitem_committed,
-        .iop_trylock    = (uint(*)(xfs_log_item_t*))
+        .iop_push       = xfs_qm_dquot_logitem_push,
-                                        xfs_qm_dquot_logitem_trylock,
+        .iop_pushbuf    = xfs_qm_dquot_logitem_pushbuf,
-        .iop_unlock     = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_unlock,
+        .iop_committing = xfs_qm_dquot_logitem_committing
-        .iop_committed  = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
-                                        xfs_qm_dquot_logitem_committed,
-        .iop_push       = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_push,
-        .iop_pushbuf    = (void(*)(xfs_log_item_t*))
-                                        xfs_qm_dquot_logitem_pushbuf,
-        .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
-                                        xfs_qm_dquot_logitem_committing
 };
 /*
@@ -350,10 +312,9 @@ static struct xfs_item_ops xfs_dquot_item_ops = {
 */
 void
 xfs_qm_dquot_logitem_init(
-        struct xfs_dquot *dqp)
+        struct xfs_dquot        *dqp)
 {
-        xfs_dq_logitem_t  *lp;
+        struct xfs_dq_logitem   *lp = &dqp->q_logitem;
-        lp = &dqp->q_logitem;
        xfs_log_item_init(dqp->q_mount, &lp->qli_item, XFS_LI_DQUOT,
                                        &xfs_dquot_item_ops);
@@ -374,16 +335,22 @@ xfs_qm_dquot_logitem_init(
 /*------------------  QUOTAOFF LOG ITEMS  -------------------*/
+static inline struct xfs_qoff_logitem *QOFF_ITEM(struct xfs_log_item *lip)
+{
+        return container_of(lip, struct xfs_qoff_logitem, qql_item);
+}
 /*
 * This returns the number of iovecs needed to log the given quotaoff item.
 * We only need 1 iovec for an quotaoff item.  It just logs the
 * quotaoff_log_format structure.
 */
-/*ARGSUSED*/
 STATIC uint
-xfs_qm_qoff_logitem_size(xfs_qoff_logitem_t *qf)
+xfs_qm_qoff_logitem_size(
+        struct xfs_log_item     *lip)
 {
-        return (1);
+        return 1;
 }
 /*
@@ -394,53 +361,46 @@ xfs_qm_qoff_logitem_size(xfs_qoff_logitem_t *qf)
 * slots in the quotaoff item have been filled.
 */
 STATIC void
-xfs_qm_qoff_logitem_format(xfs_qoff_logitem_t   *qf,
+xfs_qm_qoff_logitem_format(
-                           xfs_log_iovec_t      *log_vector)
+        struct xfs_log_item     *lip,
+        struct xfs_log_iovec    *log_vector)
 {
-        ASSERT(qf->qql_format.qf_type == XFS_LI_QUOTAOFF);
+        struct xfs_qoff_logitem *qflip = QOFF_ITEM(lip);
+        ASSERT(qflip->qql_format.qf_type == XFS_LI_QUOTAOFF);
-        log_vector->i_addr = (xfs_caddr_t)&(qf->qql_format);
+        log_vector->i_addr = &qflip->qql_format;
        log_vector->i_len = sizeof(xfs_qoff_logitem_t);
        log_vector->i_type = XLOG_REG_TYPE_QUOTAOFF;
-        qf->qql_format.qf_size = 1;
+        qflip->qql_format.qf_size = 1;
 }
 /*
 * Pinning has no meaning for an quotaoff item, so just return.
 */
-/*ARGSUSED*/
 STATIC void
-xfs_qm_qoff_logitem_pin(xfs_qoff_logitem_t *qf)
+xfs_qm_qoff_logitem_pin(
+        struct xfs_log_item     *lip)
 {
-        return;
 }
 /*
 * Since pinning has no meaning for an quotaoff item, unpinning does
 * not either.
 */
-/*ARGSUSED*/
 STATIC void
-xfs_qm_qoff_logitem_unpin(xfs_qoff_logitem_t *qf)
+xfs_qm_qoff_logitem_unpin(
+        struct xfs_log_item     *lip,
+        int                     remove)
 {
-        return;
-}
-/*ARGSUSED*/
-STATIC void
-xfs_qm_qoff_logitem_unpin_remove(xfs_qoff_logitem_t *qf, xfs_trans_t *tp)
-{
-        return;
 }
 /*
 * Quotaoff items have no locking, so just return success.
 */
-/*ARGSUSED*/
 STATIC uint
-xfs_qm_qoff_logitem_trylock(xfs_qoff_logitem_t *qf)
+xfs_qm_qoff_logitem_trylock(
+        struct xfs_log_item     *lip)
 {
        return XFS_ITEM_LOCKED;
 }
@@ -449,53 +409,51 @@ xfs_qm_qoff_logitem_trylock(xfs_qoff_logitem_t *qf)
 * Quotaoff items have no locking or pushing, so return failure
 * so that the caller doesn't bother with us.
 */
-/*ARGSUSED*/
 STATIC void
-xfs_qm_qoff_logitem_unlock(xfs_qoff_logitem_t *qf)
+xfs_qm_qoff_logitem_unlock(
+        struct xfs_log_item     *lip)
 {
-        return;
 }
 /*
 * The quotaoff-start-item is logged only once and cannot be moved in the log,
 * so simply return the lsn at which it's been logged.
 */
-/*ARGSUSED*/
 STATIC xfs_lsn_t
-xfs_qm_qoff_logitem_committed(xfs_qoff_logitem_t *qf, xfs_lsn_t lsn)
+xfs_qm_qoff_logitem_committed(
+        struct xfs_log_item     *lip,
+        xfs_lsn_t               lsn)
 {
-        return (lsn);
+        return lsn;
 }
 /*
 * There isn't much you can do to push on an quotaoff item.  It is simply
 * stuck waiting for the log to be flushed to disk.
 */
-/*ARGSUSED*/
 STATIC void
-xfs_qm_qoff_logitem_push(xfs_qoff_logitem_t *qf)
+xfs_qm_qoff_logitem_push(
+        struct xfs_log_item     *lip)
 {
-        return;
 }
-/*ARGSUSED*/
 STATIC xfs_lsn_t
 xfs_qm_qoffend_logitem_committed(
-        xfs_qoff_logitem_t *qfe,
+        struct xfs_log_item     *lip,
-        xfs_lsn_t lsn)
+        xfs_lsn_t               lsn)
 {
-        xfs_qoff_logitem_t      *qfs;
+        struct xfs_qoff_logitem *qfe = QOFF_ITEM(lip);
-        struct xfs_ail          *ailp;
+        struct xfs_qoff_logitem *qfs = qfe->qql_start_lip;
+        struct xfs_ail          *ailp = qfs->qql_item.li_ailp;
-        qfs = qfe->qql_start_lip;
-        ailp = qfs->qql_item.li_ailp;
-        spin_lock(&ailp->xa_lock);
        /*
         * Delete the qoff-start logitem from the AIL.
         * xfs_trans_ail_delete() drops the AIL lock.
         */
+        spin_lock(&ailp->xa_lock);
        xfs_trans_ail_delete(ailp, (xfs_log_item_t *)qfs);
        kmem_free(qfs);
        kmem_free(qfe);
        return (xfs_lsn_t)-1;
@@ -515,71 +473,52 @@ xfs_qm_qoffend_logitem_committed(
 * (truly makes the quotaoff irrevocable).  If we do something else,
 * then maybe we don't need two.
 */
-/* ARGSUSED */
-STATIC void
-xfs_qm_qoff_logitem_committing(xfs_qoff_logitem_t *qip, xfs_lsn_t commit_lsn)
-{
-        return;
-}
-/* ARGSUSED */
 STATIC void
-xfs_qm_qoffend_logitem_committing(xfs_qoff_logitem_t *qip, xfs_lsn_t commit_lsn)
+xfs_qm_qoff_logitem_committing(
+        struct xfs_log_item     *lip,
+        xfs_lsn_t               commit_lsn)
 {
-        return;
 }
 static struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
-        .iop_size       = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_size,
+        .iop_size       = xfs_qm_qoff_logitem_size,
-        .iop_format     = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
+        .iop_format     = xfs_qm_qoff_logitem_format,
-                                        xfs_qm_qoff_logitem_format,
+        .iop_pin        = xfs_qm_qoff_logitem_pin,
-        .iop_pin        = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_pin,
+        .iop_unpin      = xfs_qm_qoff_logitem_unpin,
-        .iop_unpin      = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_unpin,
+        .iop_trylock    = xfs_qm_qoff_logitem_trylock,
-        .iop_unpin_remove = (void(*)(xfs_log_item_t*,xfs_trans_t*))
+        .iop_unlock     = xfs_qm_qoff_logitem_unlock,
-                                        xfs_qm_qoff_logitem_unpin_remove,
+        .iop_committed  = xfs_qm_qoffend_logitem_committed,
-        .iop_trylock    = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_trylock,
+        .iop_push       = xfs_qm_qoff_logitem_push,
-        .iop_unlock     = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_unlock,
+        .iop_committing = xfs_qm_qoff_logitem_committing
-        .iop_committed  = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
-                                        xfs_qm_qoffend_logitem_committed,
-        .iop_push       = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_push,
-        .iop_pushbuf    = NULL,
-        .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
-                                        xfs_qm_qoffend_logitem_committing
 };
 /*
 * This is the ops vector shared by all quotaoff-start log items.
 */
 static struct xfs_item_ops xfs_qm_qoff_logitem_ops = {
-        .iop_size       = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_size,
+        .iop_size       = xfs_qm_qoff_logitem_size,
-        .iop_format     = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
+        .iop_format     = xfs_qm_qoff_logitem_format,
-                                        xfs_qm_qoff_logitem_format,
+        .iop_pin        = xfs_qm_qoff_logitem_pin,
-        .iop_pin        = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_pin,
+        .iop_unpin      = xfs_qm_qoff_logitem_unpin,
-        .iop_unpin      = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_unpin,
+        .iop_trylock    = xfs_qm_qoff_logitem_trylock,
-        .iop_unpin_remove = (void(*)(xfs_log_item_t*,xfs_trans_t*))
+        .iop_unlock     = xfs_qm_qoff_logitem_unlock,
-                                        xfs_qm_qoff_logitem_unpin_remove,
+        .iop_committed  = xfs_qm_qoff_logitem_committed,
-        .iop_trylock    = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_trylock,
+        .iop_push       = xfs_qm_qoff_logitem_push,
-        .iop_unlock     = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_unlock,
+        .iop_committing = xfs_qm_qoff_logitem_committing
-        .iop_committed  = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
-                                        xfs_qm_qoff_logitem_committed,
-        .iop_push       = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_push,
-        .iop_pushbuf    = NULL,
-        .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
-                                        xfs_qm_qoff_logitem_committing
 };
 /*
 * Allocate and initialize an quotaoff item of the correct quota type(s).
 */
-xfs_qoff_logitem_t *
+struct xfs_qoff_logitem *
 xfs_qm_qoff_logitem_init(
-        struct xfs_mount *mp,
+        struct xfs_mount        *mp,
-        xfs_qoff_logitem_t *start,
+        struct xfs_qoff_logitem *start,
-        uint flags)
+        uint                    flags)
 {
-        xfs_qoff_logitem_t      *qf;
+        struct xfs_qoff_logitem *qf;
-        qf = (xfs_qoff_logitem_t*) kmem_zalloc(sizeof(xfs_qoff_logitem_t), KM_SLEEP);
+        qf = kmem_zalloc(sizeof(struct xfs_qoff_logitem), KM_SLEEP);
        xfs_log_item_init(mp, &qf->qql_item, XFS_LI_QUOTAOFF, start ?
                        &xfs_qm_qoffend_logitem_ops : &xfs_qm_qoff_logitem_ops);
@@ -587,5 +526,5 @@ xfs_qm_qoff_logitem_init(
        qf->qql_format.qf_type = XFS_LI_QUOTAOFF;
        qf->qql_format.qf_flags = flags;
        qf->qql_start_lip = start;
-        return (qf);
+        return qf;
 }
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 67c018392d62..f8e854b4fde8 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -23,25 +23,18 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
 #include "xfs_alloc.h"
-#include "xfs_dmapi.h"
 #include "xfs_quota.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
-#include "xfs_btree.h"
 #include "xfs_ialloc.h"
 #include "xfs_itable.h"
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
 #include "xfs_bmap.h"
-#include "xfs_rw.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_trans_space.h"
@@ -62,8 +55,6 @@ uint		ndquot;
 kmem_zone_t     *qm_dqzone;
 kmem_zone_t     *qm_dqtrxzone;
-static cred_t   xfs_zerocr;
 STATIC void     xfs_qm_list_init(xfs_dqlist_t *, char *, int);
 STATIC void     xfs_qm_list_destroy(xfs_dqlist_t *);
@@ -844,7 +835,7 @@ xfs_qm_dqattach_locked(
                        xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
                                                flags & XFS_QMOPT_DQALLOC,
                                                ip->i_udquot, &ip->i_gdquot) :
-                        xfs_qm_dqattach_one(ip, ip->i_d.di_projid, XFS_DQ_PROJ,
+                        xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ,
                                                flags & XFS_QMOPT_DQALLOC,
                                                ip->i_udquot, &ip->i_gdquot);
                /*
@@ -1206,87 +1197,6 @@ xfs_qm_list_destroy(
        mutex_destroy(&(list->qh_lock));
 }
-/*
- * Stripped down version of dqattach. This doesn't attach, or even look at the
- * dquots attached to the inode. The rationale is that there won't be any
- * attached at the time this is called from quotacheck.
- */
-STATIC int
-xfs_qm_dqget_noattach(
-        xfs_inode_t     *ip,
-        xfs_dquot_t     **O_udqpp,
-        xfs_dquot_t     **O_gdqpp)
-{
-        int             error;
-        xfs_mount_t     *mp;
-        xfs_dquot_t     *udqp, *gdqp;
-        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-        mp = ip->i_mount;
-        udqp = NULL;
-        gdqp = NULL;
-        if (XFS_IS_UQUOTA_ON(mp)) {
-                ASSERT(ip->i_udquot == NULL);
-                /*
-                 * We want the dquot allocated if it doesn't exist.
-                 */
-                if ((error = xfs_qm_dqget(mp, ip, ip->i_d.di_uid, XFS_DQ_USER,
-                                         XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN,
-                                         &udqp))) {
-                        /*
-                         * Shouldn't be able to turn off quotas here.
-                         */
-                        ASSERT(error != ESRCH);
-                        ASSERT(error != ENOENT);
-                        return error;
-                }
-                ASSERT(udqp);
-        }
-        if (XFS_IS_OQUOTA_ON(mp)) {
-                ASSERT(ip->i_gdquot == NULL);
-                if (udqp)
-                        xfs_dqunlock(udqp);
-                error = XFS_IS_GQUOTA_ON(mp) ?
-                                xfs_qm_dqget(mp, ip,
-                                             ip->i_d.di_gid, XFS_DQ_GROUP,
-                                             XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN,
-                                             &gdqp) :
-                                xfs_qm_dqget(mp, ip,
-                                             ip->i_d.di_projid, XFS_DQ_PROJ,
-                                             XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN,
-                                             &gdqp);
-                if (error) {
-                        if (udqp)
-                                xfs_qm_dqrele(udqp);
-                        ASSERT(error != ESRCH);
-                        ASSERT(error != ENOENT);
-                        return error;
-                }
-                ASSERT(gdqp);
-                /* Reacquire the locks in the right order */
-                if (udqp) {
-                        if (! xfs_qm_dqlock_nowait(udqp)) {
-                                xfs_dqunlock(gdqp);
-                                xfs_dqlock(udqp);
-                                xfs_dqlock(gdqp);
-                        }
-                }
-        }
-        *O_udqpp = udqp;
-        *O_gdqpp = gdqp;
-#ifdef QUOTADEBUG
-        if (udqp) ASSERT(XFS_DQ_IS_LOCKED(udqp));
-        if (gdqp) ASSERT(XFS_DQ_IS_LOCKED(gdqp));
-#endif
-        return 0;
-}
 /*
 * Create an inode and return with a reference already taken, but unlocked
 * This is how we create quota inodes
@@ -1312,8 +1222,8 @@ xfs_qm_qino_alloc(
                return error;
        }
-        if ((error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0,
+        error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, &committed);
-                                   &xfs_zerocr, 0, 1, ip, &committed))) {
+        if (error) {
                xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
                                 XFS_TRANS_ABORT);
                return error;
@@ -1497,7 +1407,7 @@ xfs_qm_dqiterate(
                                  maxlblkcnt - lblkno,
                                  XFS_BMAPI_METADATA,
                                  NULL,
-                                  0, map, &nmaps, NULL, NULL);
+                                  0, map, &nmaps, NULL);
                xfs_iunlock(qip, XFS_ILOCK_SHARED);
                if (error)
                        break;
@@ -1523,7 +1433,7 @@ xfs_qm_dqiterate(
                                rablkcnt =  map[i+1].br_blockcount;
                                rablkno = map[i+1].br_startblock;
                                while (rablkcnt--) {
-                                        xfs_baread(mp->m_ddev_targp,
+                                        xfs_buf_readahead(mp->m_ddev_targp,
                                               XFS_FSB_TO_DADDR(mp, rablkno),
                                               mp->m_quotainfo->qi_dqchunklen);
                                        rablkno++;
@@ -1553,18 +1463,34 @@ xfs_qm_dqiterate(
 /*
 * Called by dqusage_adjust in doing a quotacheck.
- * Given the inode, and a dquot (either USR or GRP, doesn't matter),
+ *
- * this updates its incore copy as well as the buffer copy. This is
+ * Given the inode, and a dquot id this updates both the incore dqout as well
- * so that once the quotacheck is done, we can just log all the buffers,
+ * as the buffer copy. This is so that once the quotacheck is done, we can
- * as opposed to logging numerous updates to individual dquots.
+ * just log all the buffers, as opposed to logging numerous updates to
+ * individual dquots.
 */
-STATIC void
+STATIC int
 xfs_qm_quotacheck_dqadjust(
-        xfs_dquot_t             *dqp,
+        struct xfs_inode        *ip,
+        xfs_dqid_t              id,
+        uint                    type,
        xfs_qcnt_t              nblks,
        xfs_qcnt_t              rtblks)
 {
-        ASSERT(XFS_DQ_IS_LOCKED(dqp));
+        struct xfs_mount        *mp = ip->i_mount;
+        struct xfs_dquot        *dqp;
+        int                     error;
+        error = xfs_qm_dqget(mp, ip, id, type,
+                             XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, &dqp);
+        if (error) {
+                /*
+                 * Shouldn't be able to turn off quotas here.
+                 */
+                ASSERT(error != ESRCH);
+                ASSERT(error != ENOENT);
+                return error;
+        }
        trace_xfs_dqadjust(dqp);
@@ -1589,11 +1515,13 @@ xfs_qm_quotacheck_dqadjust(
         * There are no timers for the default values set in the root dquot.
         */
        if (dqp->q_core.d_id) {
-                xfs_qm_adjust_dqlimits(dqp->q_mount, &dqp->q_core);
+                xfs_qm_adjust_dqlimits(mp, &dqp->q_core);
-                xfs_qm_adjust_dqtimers(dqp->q_mount, &dqp->q_core);
+                xfs_qm_adjust_dqtimers(mp, &dqp->q_core);
        }
        dqp->dq_flags |= XFS_DQ_DIRTY;
+        xfs_qm_dqput(dqp);
+        return 0;
 }
 STATIC int
@@ -1636,8 +1564,7 @@ xfs_qm_dqusage_adjust(
        int             *res)           /* result code value */
 {
        xfs_inode_t     *ip;
-        xfs_dquot_t     *udqp, *gdqp;
+        xfs_qcnt_t      nblks, rtblks = 0;
-        xfs_qcnt_t      nblks, rtblks;
        int             error;
        ASSERT(XFS_IS_QUOTA_RUNNING(mp));
@@ -1657,49 +1584,24 @@ xfs_qm_dqusage_adjust(
         * the case in all other instances. It's OK that we do this because
         * quotacheck is done only at mount time.
         */
-        if ((error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip))) {
+        error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip);
+        if (error) {
                *res = BULKSTAT_RV_NOTHING;
                return error;
        }
-        /*
+        ASSERT(ip->i_delayed_blks == 0);
-         * Obtain the locked dquots. In case of an error (eg. allocation
-         * fails for ENOSPC), we return the negative of the error number
-         * to bulkstat, so that it can get propagated to quotacheck() and
-         * making us disable quotas for the file system.
-         */
-        if ((error = xfs_qm_dqget_noattach(ip, &udqp, &gdqp))) {
-                xfs_iput(ip, XFS_ILOCK_EXCL);
-                *res = BULKSTAT_RV_GIVEUP;
-                return error;
-        }
-        rtblks = 0;
+        if (XFS_IS_REALTIME_INODE(ip)) {
-        if (! XFS_IS_REALTIME_INODE(ip)) {
-                nblks = (xfs_qcnt_t)ip->i_d.di_nblocks;
-        } else {
                /*
                 * Walk thru the extent list and count the realtime blocks.
                 */
-                if ((error = xfs_qm_get_rtblks(ip, &rtblks))) {
+                error = xfs_qm_get_rtblks(ip, &rtblks);
-                        xfs_iput(ip, XFS_ILOCK_EXCL);
+                if (error)
-                        if (udqp)
+                        goto error0;
-                                xfs_qm_dqput(udqp);
-                        if (gdqp)
-                                xfs_qm_dqput(gdqp);
-                        *res = BULKSTAT_RV_GIVEUP;
-                        return error;
-                }
-                nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks;
        }
-        ASSERT(ip->i_delayed_blks == 0);
-        /*
+        nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks;
-         * We can't release the inode while holding its dquot locks.
-         * The inode can go into inactive and might try to acquire the dquotlocks.
-         * So, just unlock here and do a vn_rele at the end.
-         */
-        xfs_iunlock(ip, XFS_ILOCK_EXCL);
        /*
         * Add the (disk blocks and inode) resources occupied by this
@@ -1714,26 +1616,36 @@ xfs_qm_dqusage_adjust(
         * and quotaoffs don't race. (Quotachecks happen at mount time only).
         */
        if (XFS_IS_UQUOTA_ON(mp)) {
-                ASSERT(udqp);
+                error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_uid,
-                xfs_qm_quotacheck_dqadjust(udqp, nblks, rtblks);
+                                                   XFS_DQ_USER, nblks, rtblks);
-                xfs_qm_dqput(udqp);
+                if (error)
+                        goto error0;
        }
-        if (XFS_IS_OQUOTA_ON(mp)) {
-                ASSERT(gdqp);
+        if (XFS_IS_GQUOTA_ON(mp)) {
-                xfs_qm_quotacheck_dqadjust(gdqp, nblks, rtblks);
+                error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_gid,
-                xfs_qm_dqput(gdqp);
+                                                   XFS_DQ_GROUP, nblks, rtblks);
+                if (error)
+                        goto error0;
        }
-        /*
-         * Now release the inode. This will send it to 'inactive', and
-         * possibly even free blocks.
-         */
-        IRELE(ip);
-        /*
+        if (XFS_IS_PQUOTA_ON(mp)) {
-         * Goto next inode.
+                error = xfs_qm_quotacheck_dqadjust(ip, xfs_get_projid(ip),
-         */
+                                                   XFS_DQ_PROJ, nblks, rtblks);
+                if (error)
+                        goto error0;
+        }
+        xfs_iunlock(ip, XFS_ILOCK_EXCL);
+        IRELE(ip);
        *res = BULKSTAT_RV_DIDONE;
        return 0;
+error0:
+        xfs_iunlock(ip, XFS_ILOCK_EXCL);
+        IRELE(ip);
+        *res = BULKSTAT_RV_GIVEUP;
+        return error;
 }
 /*
@@ -2229,7 +2141,7 @@ xfs_qm_write_sb_changes(
 /*
- * Given an inode, a uid and gid (from cred_t) make sure that we have
+ * Given an inode, a uid, gid and prid make sure that we have
 * allocated relevant dquot(s) on disk, and that we won't exceed inode
 * quotas by creating this file.
 * This also attaches dquot(s) to the given inode after locking it,
@@ -2337,7 +2249,7 @@ xfs_qm_vop_dqalloc(
                        xfs_dqunlock(gq);
                }
        } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {
-                if (ip->i_d.di_projid != prid) {
+                if (xfs_get_projid(ip) != prid) {
                        xfs_iunlock(ip, lockflags);
                        if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid,
                                                 XFS_DQ_PROJ,
@@ -2459,7 +2371,7 @@ xfs_qm_vop_chown_reserve(
        }
        if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) {
                if (XFS_IS_PQUOTA_ON(ip->i_mount) &&
-                     ip->i_d.di_projid != be32_to_cpu(gdqp->q_core.d_id))
+                     xfs_get_projid(ip) != be32_to_cpu(gdqp->q_core.d_id))
                        prjflags = XFS_QMOPT_ENOSPC;
                if (prjflags ||
@@ -2563,7 +2475,7 @@ xfs_qm_vop_create_dqattach(
                ip->i_gdquot = gdqp;
                ASSERT(XFS_IS_OQUOTA_ON(mp));
                ASSERT((XFS_IS_GQUOTA_ON(mp) ?
-                        ip->i_d.di_gid : ip->i_d.di_projid) ==
+                        ip->i_d.di_gid : xfs_get_projid(ip)) ==
                                be32_to_cpu(gdqp->q_core.d_id));
                xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
        }
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index 97b410c12794..45b5cb1788ab 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -23,25 +23,15 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
 #include "xfs_alloc.h"
-#include "xfs_dmapi.h"
 #include "xfs_quota.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dinode.h"
 #include "xfs_inode.h"
-#include "xfs_ialloc.h"
 #include "xfs_itable.h"
-#include "xfs_btree.h"
 #include "xfs_bmap.h"
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
-#include "xfs_rw.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_qm.h"
@@ -91,7 +81,7 @@ xfs_qm_statvfs(
        xfs_mount_t             *mp = ip->i_mount;
        xfs_dquot_t             *dqp;
-        if (!xfs_qm_dqget(mp, NULL, ip->i_d.di_projid, XFS_DQ_PROJ, 0, &dqp)) {
+        if (!xfs_qm_dqget(mp, NULL, xfs_get_projid(ip), XFS_DQ_PROJ, 0, &dqp)) {
                xfs_fill_statvfs_from_dquot(statp, &dqp->q_core);
                xfs_qm_dqput(dqp);
        }
diff --git a/fs/xfs/quota/xfs_qm_stats.c b/fs/xfs/quota/xfs_qm_stats.c
index 3d1fc79532e2..8671a0b32644 100644
--- a/fs/xfs/quota/xfs_qm_stats.c
+++ b/fs/xfs/quota/xfs_qm_stats.c
@@ -23,25 +23,15 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
 #include "xfs_alloc.h"
-#include "xfs_dmapi.h"
 #include "xfs_quota.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dinode.h"
 #include "xfs_inode.h"
-#include "xfs_ialloc.h"
 #include "xfs_itable.h"
 #include "xfs_bmap.h"
-#include "xfs_btree.h"
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
-#include "xfs_rw.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_qm.h"
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index b4487764e923..bdebc183223e 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -26,25 +26,15 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
 #include "xfs_alloc.h"
-#include "xfs_dmapi.h"
 #include "xfs_quota.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dinode.h"
 #include "xfs_inode.h"
-#include "xfs_ialloc.h"
 #include "xfs_itable.h"
 #include "xfs_bmap.h"
-#include "xfs_btree.h"
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
-#include "xfs_rw.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_utils.h"
@@ -248,40 +238,74 @@ out_unlock:
        return error;
 }
+STATIC int
+xfs_qm_scall_trunc_qfile(
+        struct xfs_mount        *mp,
+        xfs_ino_t               ino)
+{
+        struct xfs_inode        *ip;
+        struct xfs_trans        *tp;
+        int                     error;
+        if (ino == NULLFSINO)
+                return 0;
+        error = xfs_iget(mp, NULL, ino, 0, 0, &ip);
+        if (error)
+                return error;
+        xfs_ilock(ip, XFS_IOLOCK_EXCL);
+        tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE);
+        error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
+                                  XFS_TRANS_PERM_LOG_RES,
+                                  XFS_ITRUNCATE_LOG_COUNT);
+        if (error) {
+                xfs_trans_cancel(tp, 0);
+                xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+                goto out_put;
+        }
+        xfs_ilock(ip, XFS_ILOCK_EXCL);
+        xfs_trans_ijoin(tp, ip);
+        error = xfs_itruncate_finish(&tp, ip, 0, XFS_DATA_FORK, 1);
+        if (error) {
+                xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
+                                     XFS_TRANS_ABORT);
+                goto out_unlock;
+        }
+        xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+        error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+out_unlock:
+        xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+out_put:
+        IRELE(ip);
+        return error;
+}
 int
 xfs_qm_scall_trunc_qfiles(
        xfs_mount_t     *mp,
        uint            flags)
 {
        int             error = 0, error2 = 0;
-        xfs_inode_t     *qip;
        if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) {
                qdprintk("qtrunc flags=%x m_qflags=%x\n", flags, mp->m_qflags);
                return XFS_ERROR(EINVAL);
        }
-        if ((flags & XFS_DQ_USER) && mp->m_sb.sb_uquotino != NULLFSINO) {
+        if (flags & XFS_DQ_USER)
-                error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, 0, &qip);
+                error = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_uquotino);
-                if (!error) {
+        if (flags & (XFS_DQ_GROUP|XFS_DQ_PROJ))
-                        error = xfs_truncate_file(mp, qip);
+                error2 = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_gquotino);
-                        IRELE(qip);
-                }
-        }
-        if ((flags & (XFS_DQ_GROUP|XFS_DQ_PROJ)) &&
-            mp->m_sb.sb_gquotino != NULLFSINO) {
-                error2 = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, 0, &qip);
-                if (!error2) {
-                        error2 = xfs_truncate_file(mp, qip);
-                        IRELE(qip);
-                }
-        }
        return error ? error : error2;
 }
 /*
 * Switch on (a given) quota enforcement for a filesystem.  This takes
 * effect immediately.
@@ -786,9 +810,9 @@ xfs_qm_export_dquot(
        }
 #ifdef DEBUG
-        if (((XFS_IS_UQUOTA_ENFORCED(mp) && dst->d_flags == XFS_USER_QUOTA) ||
+        if (((XFS_IS_UQUOTA_ENFORCED(mp) && dst->d_flags == FS_USER_QUOTA) ||
             (XFS_IS_OQUOTA_ENFORCED(mp) &&
-                        (dst->d_flags & (XFS_PROJ_QUOTA | XFS_GROUP_QUOTA)))) &&
+                        (dst->d_flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)))) &&
            dst->d_id != 0) {
                if (((int) dst->d_bcount >= (int) dst->d_blk_softlimit) &&
                    (dst->d_blk_softlimit > 0)) {
@@ -809,17 +833,17 @@ xfs_qm_export_qtype_flags(
        /*
         * Can't be more than one, or none.
         */
-        ASSERT((flags & (XFS_PROJ_QUOTA | XFS_USER_QUOTA)) !=
+        ASSERT((flags & (FS_PROJ_QUOTA | FS_USER_QUOTA)) !=
-                (XFS_PROJ_QUOTA | XFS_USER_QUOTA));
+                (FS_PROJ_QUOTA | FS_USER_QUOTA));
-        ASSERT((flags & (XFS_PROJ_QUOTA | XFS_GROUP_QUOTA)) !=
+        ASSERT((flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)) !=
-                (XFS_PROJ_QUOTA | XFS_GROUP_QUOTA));
+                (FS_PROJ_QUOTA | FS_GROUP_QUOTA));
-        ASSERT((flags & (XFS_USER_QUOTA | XFS_GROUP_QUOTA)) !=
+        ASSERT((flags & (FS_USER_QUOTA | FS_GROUP_QUOTA)) !=
-                (XFS_USER_QUOTA | XFS_GROUP_QUOTA));
+                (FS_USER_QUOTA | FS_GROUP_QUOTA));
-        ASSERT((flags & (XFS_PROJ_QUOTA|XFS_USER_QUOTA|XFS_GROUP_QUOTA)) != 0);
+        ASSERT((flags & (FS_PROJ_QUOTA|FS_USER_QUOTA|FS_GROUP_QUOTA)) != 0);
        return (flags & XFS_DQ_USER) ?
-                XFS_USER_QUOTA : (flags & XFS_DQ_PROJ) ?
+                FS_USER_QUOTA : (flags & XFS_DQ_PROJ) ?
-                        XFS_PROJ_QUOTA : XFS_GROUP_QUOTA;
+                        FS_PROJ_QUOTA : FS_GROUP_QUOTA;
 }
 STATIC uint
@@ -830,16 +854,16 @@ xfs_qm_export_flags(
        uflags = 0;
        if (flags & XFS_UQUOTA_ACCT)
-                uflags |= XFS_QUOTA_UDQ_ACCT;
+                uflags |= FS_QUOTA_UDQ_ACCT;
        if (flags & XFS_PQUOTA_ACCT)
-                uflags |= XFS_QUOTA_PDQ_ACCT;
+                uflags |= FS_QUOTA_PDQ_ACCT;
        if (flags & XFS_GQUOTA_ACCT)
-                uflags |= XFS_QUOTA_GDQ_ACCT;
+                uflags |= FS_QUOTA_GDQ_ACCT;
        if (flags & XFS_UQUOTA_ENFD)
-                uflags |= XFS_QUOTA_UDQ_ENFD;
+                uflags |= FS_QUOTA_UDQ_ENFD;
        if (flags & (XFS_OQUOTA_ENFD)) {
                uflags |= (flags & XFS_GQUOTA_ACCT) ?
-                        XFS_QUOTA_GDQ_ENFD : XFS_QUOTA_PDQ_ENFD;
+                        FS_QUOTA_GDQ_ENFD : FS_QUOTA_PDQ_ENFD;
        }
        return (uflags);
 }
@@ -851,21 +875,14 @@ xfs_dqrele_inode(
        struct xfs_perag        *pag,
        int                     flags)
 {
-        int                     error;
        /* skip quota inodes */
        if (ip == ip->i_mount->m_quotainfo->qi_uquotaip ||
            ip == ip->i_mount->m_quotainfo->qi_gquotaip) {
                ASSERT(ip->i_udquot == NULL);
                ASSERT(ip->i_gdquot == NULL);
-                read_unlock(&pag->pag_ici_lock);
                return 0;
        }
-        error = xfs_sync_inode_valid(ip, pag);
-        if (error)
-                return error;
        xfs_ilock(ip, XFS_ILOCK_EXCL);
        if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) {
                xfs_qm_dqrele(ip->i_udquot);
@@ -875,8 +892,7 @@ xfs_dqrele_inode(
                xfs_qm_dqrele(ip->i_gdquot);
                ip->i_gdquot = NULL;
        }
-        xfs_iput(ip, XFS_ILOCK_EXCL);
+        xfs_iunlock(ip, XFS_ILOCK_EXCL);
        return 0;
 }
@@ -893,8 +909,7 @@ xfs_qm_dqrele_all_inodes(
        uint             flags)
 {
        ASSERT(mp->m_quotainfo);
-        xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags,
+        xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags);
-                                XFS_ICI_NO_TAG, 0, NULL);
 }
 /*------------------------------------------------------------------------*/
@@ -1143,13 +1158,14 @@ xfs_qm_internalqcheck_adjust(
         * of those now.
         */
        if (! ipreleased) {
-                xfs_iput(ip, lock_flags);
+                xfs_iunlock(ip, lock_flags);
+                IRELE(ip);
                ipreleased = B_TRUE;
                goto again;
        }
        xfs_qm_internalqcheck_get_dquots(mp,
                                        (xfs_dqid_t) ip->i_d.di_uid,
-                                        (xfs_dqid_t) ip->i_d.di_projid,
+                                        (xfs_dqid_t) xfs_get_projid(ip),
                                        (xfs_dqid_t) ip->i_d.di_gid,
                                        &ud, &gd);
        if (XFS_IS_UQUOTA_ON(mp)) {
@@ -1160,7 +1176,8 @@ xfs_qm_internalqcheck_adjust(
                ASSERT(gd);
                xfs_qm_internalqcheck_dqadjust(ip, gd);
        }
-        xfs_iput(ip, lock_flags);
+        xfs_iunlock(ip, lock_flags);
+        IRELE(ip);
        *res = BULKSTAT_RV_DIDONE;
        return (0);
 }
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index 061d827da33c..7de91d1b75c0 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -23,25 +23,15 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
 #include "xfs_alloc.h"
-#include "xfs_dmapi.h"
 #include "xfs_quota.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_dinode.h"
 #include "xfs_inode.h"
-#include "xfs_ialloc.h"
 #include "xfs_itable.h"
-#include "xfs_btree.h"
 #include "xfs_bmap.h"
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
-#include "xfs_rw.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_trans_priv.h"
@@ -59,16 +49,14 @@ xfs_trans_dqjoin(
        xfs_trans_t     *tp,
        xfs_dquot_t     *dqp)
 {
-        xfs_dq_logitem_t    *lp = &dqp->q_logitem;
        ASSERT(dqp->q_transp != tp);
        ASSERT(XFS_DQ_IS_LOCKED(dqp));
-        ASSERT(lp->qli_dquot == dqp);
+        ASSERT(dqp->q_logitem.qli_dquot == dqp);
        /*
         * Get a log_item_desc to point at the new item.
         */
-        (void) xfs_trans_add_item(tp, (xfs_log_item_t*)(lp));
+        xfs_trans_add_item(tp, &dqp->q_logitem.qli_item);
        /*
         * Initialize i_transp so we can later determine if this dquot is
@@ -93,16 +81,11 @@ xfs_trans_log_dquot(
        xfs_trans_t     *tp,
        xfs_dquot_t     *dqp)
 {
-        xfs_log_item_desc_t     *lidp;
        ASSERT(dqp->q_transp == tp);
        ASSERT(XFS_DQ_IS_LOCKED(dqp));
-        lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)(&dqp->q_logitem));
-        ASSERT(lidp != NULL);
        tp->t_flags |= XFS_TRANS_DIRTY;
-        lidp->lid_flags |= XFS_LID_DIRTY;
+        dqp->q_logitem.qli_item.li_desc->lid_flags |= XFS_LID_DIRTY;
 }
 /*
@@ -874,9 +857,8 @@ xfs_trans_get_qoff_item(
        /*
         * Get a log_item_desc to point at the new item.
         */
-        (void) xfs_trans_add_item(tp, (xfs_log_item_t*)q);
+        xfs_trans_add_item(tp, &q->qql_item);
+        return q;
-        return (q);
 }
@@ -890,13 +872,8 @@ xfs_trans_log_quotaoff_item(
        xfs_trans_t             *tp,
        xfs_qoff_logitem_t      *qlp)
 {
-        xfs_log_item_desc_t     *lidp;
-        lidp = xfs_trans_find_item(tp, (xfs_log_item_t *)qlp);
-        ASSERT(lidp != NULL);
        tp->t_flags |= XFS_TRANS_DIRTY;
-        lidp->lid_flags |= XFS_LID_DIRTY;
+        qlp->qql_item.li_desc->lid_flags |= XFS_LID_DIRTY;
 }
 STATIC void
diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c
index 3f3610a7ee05..975aa10e1a47 100644
--- a/fs/xfs/support/debug.c
+++ b/fs/xfs/support/debug.c
@@ -22,7 +22,6 @@
 #include "xfs_sb.h"
 #include "xfs_inum.h"
 #include "xfs_ag.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_error.h"
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 4917d4eed4ed..63c7a1a6c022 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -230,6 +230,15 @@ typedef struct xfs_perag {
        rwlock_t        pag_ici_lock;   /* incore inode lock */
        struct radix_tree_root pag_ici_root;    /* incore inode cache root */
        int             pag_ici_reclaimable;    /* reclaimable inodes */
+        struct mutex    pag_ici_reclaim_lock;   /* serialisation point */
+        unsigned long   pag_ici_reclaim_cursor; /* reclaim restart point */
+        /* buffer cache index */
+        spinlock_t      pag_buf_lock;   /* lock for pag_buf_tree */
+        struct rb_root  pag_buf_tree;   /* ordered tree of active buffers */
+        /* for rcu-safe freeing */
+        struct rcu_head rcu_head;
 #endif
        int             pagb_count;     /* pagb slots in use */
 } xfs_perag_t;
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index a7fbe8a99b12..112abc439ca5 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -24,18 +24,13 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_btree.h"
-#include "xfs_ialloc.h"
 #include "xfs_alloc.h"
 #include "xfs_error.h"
 #include "xfs_trace.h"
@@ -680,7 +675,7 @@ xfs_alloc_ag_vextent_near(
        xfs_agblock_t   gtbnoa;         /* aligned ... */
        xfs_extlen_t    gtdiff;         /* difference to right side entry */
        xfs_extlen_t    gtlen;          /* length of right side entry */
-        xfs_extlen_t    gtlena;         /* aligned ... */
+        xfs_extlen_t    gtlena = 0;     /* aligned ... */
        xfs_agblock_t   gtnew;          /* useful start bno of right side */
        int             error;          /* error code */
        int             i;              /* result code, temporary */
@@ -688,10 +683,8 @@ xfs_alloc_ag_vextent_near(
        xfs_agblock_t   ltbno;          /* start bno of left side entry */
        xfs_agblock_t   ltbnoa;         /* aligned ... */
        xfs_extlen_t    ltdiff;         /* difference to left side entry */
-        /*REFERENCED*/
-        xfs_agblock_t   ltend;          /* end bno of left side entry */
        xfs_extlen_t    ltlen;          /* length of left side entry */
-        xfs_extlen_t    ltlena;         /* aligned ... */
+        xfs_extlen_t    ltlena = 0;     /* aligned ... */
        xfs_agblock_t   ltnew;          /* useful start bno of left side */
        xfs_extlen_t    rlen;           /* length of returned extent */
 #if defined(DEBUG) && defined(__KERNEL__)
@@ -814,8 +807,7 @@ xfs_alloc_ag_vextent_near(
                if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
                        goto error0;
                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-                ltend = ltbno + ltlen;
+                ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
-                ASSERT(ltend <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
                args->len = blen;
                if (!xfs_alloc_fix_minleft(args)) {
                        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
@@ -828,7 +820,7 @@ xfs_alloc_ag_vextent_near(
                 */
                args->agbno = bnew;
                ASSERT(bnew >= ltbno);
-                ASSERT(bnew + blen <= ltend);
+                ASSERT(bnew + blen <= ltbno + ltlen);
                /*
                 * Set up a cursor for the by-bno tree.
                 */
@@ -1157,7 +1149,6 @@ xfs_alloc_ag_vextent_near(
        /*
         * Fix up the length and compute the useful address.
         */
-        ltend = ltbno + ltlen;
        args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
        xfs_alloc_fix_len(args);
        if (!xfs_alloc_fix_minleft(args)) {
@@ -1170,7 +1161,7 @@ xfs_alloc_ag_vextent_near(
        (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, ltbno,
                ltlen, &ltnew);
        ASSERT(ltnew >= ltbno);
-        ASSERT(ltnew + rlen <= ltend);
+        ASSERT(ltnew + rlen <= ltbno + ltlen);
        ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
        args->agbno = ltnew;
        if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index 6d05199b667c..895009a97271 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -27,16 +27,16 @@ struct xfs_busy_extent;
 /*
 * Freespace allocation types.  Argument to xfs_alloc_[v]extent.
 */
-typedef enum xfs_alloctype
+#define XFS_ALLOCTYPE_ANY_AG    0x01    /* allocate anywhere, use rotor */
-{
+#define XFS_ALLOCTYPE_FIRST_AG  0x02    /* ... start at ag 0 */
-        XFS_ALLOCTYPE_ANY_AG,           /* allocate anywhere, use rotor */
+#define XFS_ALLOCTYPE_START_AG  0x04    /* anywhere, start in this a.g. */
-        XFS_ALLOCTYPE_FIRST_AG,         /* ... start at ag 0 */
+#define XFS_ALLOCTYPE_THIS_AG   0x08    /* anywhere in this a.g. */
-        XFS_ALLOCTYPE_START_AG,         /* anywhere, start in this a.g. */
+#define XFS_ALLOCTYPE_START_BNO 0x10    /* near this block else anywhere */
-        XFS_ALLOCTYPE_THIS_AG,          /* anywhere in this a.g. */
+#define XFS_ALLOCTYPE_NEAR_BNO  0x20    /* in this a.g. and near this block */
-        XFS_ALLOCTYPE_START_BNO,        /* near this block else anywhere */
+#define XFS_ALLOCTYPE_THIS_BNO  0x40    /* at exactly this block */
-        XFS_ALLOCTYPE_NEAR_BNO,         /* in this a.g. and near this block */
-        XFS_ALLOCTYPE_THIS_BNO          /* at exactly this block */
+/* this should become an enum again when the tracing code is fixed */
-} xfs_alloctype_t;
+typedef unsigned int xfs_alloctype_t;
 #define XFS_ALLOC_TYPES \
        { XFS_ALLOCTYPE_ANY_AG,         "ANY_AG" }, \
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index 83f494218759..3916925e2584 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -24,19 +24,14 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_btree.h"
 #include "xfs_btree_trace.h"
-#include "xfs_ialloc.h"
 #include "xfs_alloc.h"
 #include "xfs_error.h"
 #include "xfs_trace.h"
@@ -285,38 +280,6 @@ xfs_allocbt_key_diff(
        return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
 }
-STATIC int
-xfs_allocbt_kill_root(
-        struct xfs_btree_cur    *cur,
-        struct xfs_buf          *bp,
-        int                     level,
-        union xfs_btree_ptr     *newroot)
-{
-        int                     error;
-        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-        XFS_BTREE_STATS_INC(cur, killroot);
-        /*
-         * Update the root pointer, decreasing the level by 1 and then
-         * free the old root.
-         */
-        xfs_allocbt_set_root(cur, newroot, -1);
-        error = xfs_allocbt_free_block(cur, bp);
-        if (error) {
-                XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
-                return error;
-        }
-        XFS_BTREE_STATS_INC(cur, free);
-        xfs_btree_setbuf(cur, level, NULL);
-        cur->bc_nlevels--;
-        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-        return 0;
-}
 #ifdef DEBUG
 STATIC int
 xfs_allocbt_keys_inorder(
@@ -428,7 +391,6 @@ static const struct xfs_btree_ops xfs_allocbt_ops = {
        .dup_cursor             = xfs_allocbt_dup_cursor,
        .set_root               = xfs_allocbt_set_root,
-        .kill_root              = xfs_allocbt_kill_root,
        .alloc_block            = xfs_allocbt_alloc_block,
        .free_block             = xfs_allocbt_free_block,
        .update_lastrec         = xfs_allocbt_update_lastrec,
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index b9c196a53c42..c86375378810 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -25,19 +25,13 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_alloc.h"
-#include "xfs_btree.h"
 #include "xfs_inode_item.h"
 #include "xfs_bmap.h"
 #include "xfs_attr.h"
@@ -325,8 +319,7 @@ xfs_attr_set_int(
                return (error);
        }
-        xfs_trans_ijoin(args.trans, dp, XFS_ILOCK_EXCL);
+        xfs_trans_ijoin(args.trans, dp);
-        xfs_trans_ihold(args.trans, dp);
        /*
         * If the attribute list is non-existent or a shortform list,
@@ -362,16 +355,15 @@ xfs_attr_set_int(
                        if (mp->m_flags & XFS_MOUNT_WSYNC) {
                                xfs_trans_set_sync(args.trans);
                        }
+                        if (!error && (flags & ATTR_KERNOTIME) == 0) {
+                                xfs_trans_ichgtime(args.trans, dp,
+                                                        XFS_ICHGTIME_CHG);
+                        }
                        err2 = xfs_trans_commit(args.trans,
                                                 XFS_TRANS_RELEASE_LOG_RES);
                        xfs_iunlock(dp, XFS_ILOCK_EXCL);
-                        /*
-                         * Hit the inode change time.
-                         */
-                        if (!error && (flags & ATTR_KERNOTIME) == 0) {
-                                xfs_ichgtime(dp, XFS_ICHGTIME_CHG);
-                        }
                        return(error == 0 ? err2 : error);
                }
@@ -396,10 +388,8 @@ xfs_attr_set_int(
                 * bmap_finish() may have committed the last trans and started
                 * a new one.  We need the inode to be in all transactions.
                 */
-                if (committed) {
+                if (committed)
-                        xfs_trans_ijoin(args.trans, dp, XFS_ILOCK_EXCL);
+                        xfs_trans_ijoin(args.trans, dp);
-                        xfs_trans_ihold(args.trans, dp);
-                }
                /*
                 * Commit the leaf transformation.  We'll need another (linked)
@@ -429,6 +419,9 @@ xfs_attr_set_int(
                xfs_trans_set_sync(args.trans);
        }
+        if ((flags & ATTR_KERNOTIME) == 0)
+                xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG);
        /*
         * Commit the last in the sequence of transactions.
         */
@@ -436,13 +429,6 @@ xfs_attr_set_int(
        error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES);
        xfs_iunlock(dp, XFS_ILOCK_EXCL);
-        /*
-         * Hit the inode change time.
-         */
-        if (!error && (flags & ATTR_KERNOTIME) == 0) {
-                xfs_ichgtime(dp, XFS_ICHGTIME_CHG);
-        }
        return(error);
 out:
@@ -544,8 +530,7 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
         * No need to make quota reservations here. We expect to release some
         * blocks not allocate in the common case.
         */
-        xfs_trans_ijoin(args.trans, dp, XFS_ILOCK_EXCL);
+        xfs_trans_ijoin(args.trans, dp);
-        xfs_trans_ihold(args.trans, dp);
        /*
         * Decide on what work routines to call based on the inode size.
@@ -577,6 +562,9 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
                xfs_trans_set_sync(args.trans);
        }
+        if ((flags & ATTR_KERNOTIME) == 0)
+                xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG);
        /*
         * Commit the last in the sequence of transactions.
         */
@@ -584,13 +572,6 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
        error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES);
        xfs_iunlock(dp, XFS_ILOCK_EXCL);
-        /*
-         * Hit the inode change time.
-         */
-        if (!error && (flags & ATTR_KERNOTIME) == 0) {
-                xfs_ichgtime(dp, XFS_ICHGTIME_CHG);
-        }
        return(error);
 out:
@@ -821,8 +802,7 @@ xfs_attr_inactive(xfs_inode_t *dp)
         * No need to make quota reservations here. We expect to release some
         * blocks, not allocate, in the common case.
         */
-        xfs_trans_ijoin(trans, dp, XFS_ILOCK_EXCL);
+        xfs_trans_ijoin(trans, dp);
-        xfs_trans_ihold(trans, dp);
        /*
         * Decide on what work routines to call based on the inode size.
@@ -981,10 +961,8 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
                 * bmap_finish() may have committed the last trans and started
                 * a new one.  We need the inode to be in all transactions.
                 */
-                if (committed) {
+                if (committed)
-                        xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
+                        xfs_trans_ijoin(args->trans, dp);
-                        xfs_trans_ihold(args->trans, dp);
-                }
                /*
                 * Commit the current trans (including the inode) and start
@@ -1085,10 +1063,8 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
                         * and started a new one.  We need the inode to be
                         * in all transactions.
                         */
-                        if (committed) {
+                        if (committed)
-                                xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
+                                xfs_trans_ijoin(args->trans, dp);
-                                xfs_trans_ihold(args->trans, dp);
-                        }
                } else
                        xfs_da_buf_done(bp);
@@ -1161,10 +1137,8 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
                 * bmap_finish() may have committed the last trans and started
                 * a new one.  We need the inode to be in all transactions.
                 */
-                if (committed) {
+                if (committed)
-                        xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
+                        xfs_trans_ijoin(args->trans, dp);
-                        xfs_trans_ihold(args->trans, dp);
-                }
        } else
                xfs_da_buf_done(bp);
        return(0);
@@ -1317,10 +1291,8 @@ restart:
                         * and started a new one.  We need the inode to be
                         * in all transactions.
                         */
-                        if (committed) {
+                        if (committed)
-                                xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
+                                xfs_trans_ijoin(args->trans, dp);
-                                xfs_trans_ihold(args->trans, dp);
-                        }
                        /*
                         * Commit the node conversion and start the next
@@ -1356,10 +1328,8 @@ restart:
                 * bmap_finish() may have committed the last trans and started
                 * a new one.  We need the inode to be in all transactions.
                 */
-                if (committed) {
+                if (committed)
-                        xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
+                        xfs_trans_ijoin(args->trans, dp);
-                        xfs_trans_ihold(args->trans, dp);
-                }
        } else {
                /*
                 * Addition succeeded, update Btree hashvals.
@@ -1470,10 +1440,8 @@ restart:
                         * and started a new one.  We need the inode to be
                         * in all transactions.
                         */
-                        if (committed) {
+                        if (committed)
-                                xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
+                                xfs_trans_ijoin(args->trans, dp);
-                                xfs_trans_ihold(args->trans, dp);
-                        }
                }
                /*
@@ -1604,10 +1572,8 @@ xfs_attr_node_removename(xfs_da_args_t *args)
                 * bmap_finish() may have committed the last trans and started
                 * a new one.  We need the inode to be in all transactions.
                 */
-                if (committed) {
+                if (committed)
-                        xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
+                        xfs_trans_ijoin(args->trans, dp);
-                        xfs_trans_ihold(args->trans, dp);
-                }
                /*
                 * Commit the Btree join operation and start a new trans.
@@ -1658,10 +1624,8 @@ xfs_attr_node_removename(xfs_da_args_t *args)
                         * and started a new one.  We need the inode to be
                         * in all transactions.
                         */
-                        if (committed) {
+                        if (committed)
-                                xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
+                                xfs_trans_ijoin(args->trans, dp);
-                                xfs_trans_ihold(args->trans, dp);
-                        }
                } else
                        xfs_da_brelse(args->trans, bp);
        }
@@ -2004,7 +1968,7 @@ xfs_attr_rmtval_get(xfs_da_args_t *args)
                error = xfs_bmapi(args->trans, args->dp, (xfs_fileoff_t)lblkno,
                                  args->rmtblkcnt,
                                  XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
-                                  NULL, 0, map, &nmap, NULL, NULL);
+                                  NULL, 0, map, &nmap, NULL);
                if (error)
                        return(error);
                ASSERT(nmap >= 1);
@@ -2022,7 +1986,7 @@ xfs_attr_rmtval_get(xfs_da_args_t *args)
                        tmp = (valuelen < XFS_BUF_SIZE(bp))
                                ? valuelen : XFS_BUF_SIZE(bp);
-                        xfs_biomove(bp, 0, tmp, dst, XBF_READ);
+                        xfs_buf_iomove(bp, 0, tmp, dst, XBRW_READ);
                        xfs_buf_relse(bp);
                        dst += tmp;
                        valuelen -= tmp;
@@ -2083,7 +2047,7 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
                                  XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA |
                                                        XFS_BMAPI_WRITE,
                                  args->firstblock, args->total, &map, &nmap,
-                                  args->flist, NULL);
+                                  args->flist);
                if (!error) {
                        error = xfs_bmap_finish(&args->trans, args->flist,
                                                &committed);
@@ -2099,10 +2063,8 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
                 * bmap_finish() may have committed the last trans and started
                 * a new one.  We need the inode to be in all transactions.
                 */
-                if (committed) {
+                if (committed)
-                        xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
+                        xfs_trans_ijoin(args->trans, dp);
-                        xfs_trans_ihold(args->trans, dp);
-                }
                ASSERT(nmap == 1);
                ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
@@ -2136,7 +2098,7 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
                                  args->rmtblkcnt,
                                  XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
                                  args->firstblock, 0, &map, &nmap,
-                                  NULL, NULL);
+                                  NULL);
                if (error) {
                        return(error);
                }
@@ -2154,9 +2116,9 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
                tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen :
                                                        XFS_BUF_SIZE(bp);
-                xfs_biomove(bp, 0, tmp, src, XBF_WRITE);
+                xfs_buf_iomove(bp, 0, tmp, src, XBRW_WRITE);
                if (tmp < XFS_BUF_SIZE(bp))
-                        xfs_biozero(bp, tmp, XFS_BUF_SIZE(bp) - tmp);
+                        xfs_buf_zero(bp, tmp, XFS_BUF_SIZE(bp) - tmp);
                if ((error = xfs_bwrite(mp, bp))) {/* GROT: NOTE: synchronous write */
                        return (error);
                }
@@ -2201,7 +2163,7 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
                                        args->rmtblkcnt,
                                        XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
                                        args->firstblock, 0, &map, &nmap,
-                                        args->flist, NULL);
+                                        args->flist);
                if (error) {
                        return(error);
                }
@@ -2239,7 +2201,7 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
                error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
                                    XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
                                    1, args->firstblock, args->flist,
-                                    NULL, &done);
+                                    &done);
                if (!error) {
                        error = xfs_bmap_finish(&args->trans, args->flist,
                                                &committed);
@@ -2255,10 +2217,8 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
                 * bmap_finish() may have committed the last trans and started
                 * a new one.  We need the inode to be in all transactions.
                 */
-                if (committed) {
+                if (committed)
-                        xfs_trans_ijoin(args->trans, args->dp, XFS_ILOCK_EXCL);
+                        xfs_trans_ijoin(args->trans, args->dp);
-                        xfs_trans_ihold(args->trans, args->dp);
-                }
                /*
                 * Close out trans and start the next one in the chain.
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index a90ce74fc256..a6cff8edcdb6 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -24,8 +24,6 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
@@ -33,7 +31,6 @@
 #include "xfs_ialloc_btree.h"
 #include "xfs_alloc.h"
 #include "xfs_btree.h"
-#include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
@@ -2931,7 +2928,7 @@ xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp,
                nmap = 1;
                error = xfs_bmapi(*trans, dp, (xfs_fileoff_t)tblkno, tblkcnt,
                                        XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
-                                        NULL, 0, &map, &nmap, NULL, NULL);
+                                        NULL, 0, &map, &nmap, NULL);
                if (error) {
                        return(error);
                }
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 99587ded043f..8abd12e32e13 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -30,13 +30,10 @@
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
 #include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_btree.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
-#include "xfs_ialloc.h"
 #include "xfs_itable.h"
 #include "xfs_dir2_data.h"
 #include "xfs_dir2_leaf.h"
@@ -104,7 +101,6 @@ xfs_bmap_add_extent(
        xfs_fsblock_t           *first, /* pointer to firstblock variable */
        xfs_bmap_free_t         *flist, /* list of extents to be freed */
        int                     *logflagsp, /* inode logging flags */
-        xfs_extdelta_t          *delta, /* Change made to incore extents */
        int                     whichfork, /* data or attr fork */
        int                     rsvd);  /* OK to allocate reserved blocks */
@@ -122,7 +118,6 @@ xfs_bmap_add_extent_delay_real(
        xfs_fsblock_t           *first, /* pointer to firstblock variable */
        xfs_bmap_free_t         *flist, /* list of extents to be freed */
        int                     *logflagsp, /* inode logging flags */
-        xfs_extdelta_t          *delta, /* Change made to incore extents */
        int                     rsvd);  /* OK to allocate reserved blocks */
 /*
@@ -135,7 +130,6 @@ xfs_bmap_add_extent_hole_delay(
        xfs_extnum_t            idx,    /* extent number to update/insert */
        xfs_bmbt_irec_t         *new,   /* new data to add to file extents */
        int                     *logflagsp,/* inode logging flags */
-        xfs_extdelta_t          *delta, /* Change made to incore extents */
        int                     rsvd);  /* OK to allocate reserved blocks */
 /*
@@ -149,7 +143,6 @@ xfs_bmap_add_extent_hole_real(
        xfs_btree_cur_t         *cur,   /* if null, not a btree */
        xfs_bmbt_irec_t         *new,   /* new data to add to file extents */
        int                     *logflagsp, /* inode logging flags */
-        xfs_extdelta_t          *delta, /* Change made to incore extents */
        int                     whichfork); /* data or attr fork */
 /*
@@ -162,8 +155,7 @@ xfs_bmap_add_extent_unwritten_real(
        xfs_extnum_t            idx,    /* extent number to update/insert */
        xfs_btree_cur_t         **curp, /* if *curp is null, not a btree */
        xfs_bmbt_irec_t         *new,   /* new data to add to file extents */
-        int                     *logflagsp, /* inode logging flags */
+        int                     *logflagsp); /* inode logging flags */
-        xfs_extdelta_t          *delta); /* Change made to incore extents */
 /*
 * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
@@ -200,7 +192,6 @@ xfs_bmap_del_extent(
        xfs_btree_cur_t         *cur,   /* if null, not a btree */
        xfs_bmbt_irec_t         *new,   /* new data to add to file extents */
        int                     *logflagsp,/* inode logging flags */
-        xfs_extdelta_t          *delta, /* Change made to incore extents */
        int                     whichfork, /* data or attr fork */
        int                     rsvd);   /* OK to allocate reserved blocks */
@@ -489,7 +480,6 @@ xfs_bmap_add_extent(
        xfs_fsblock_t           *first, /* pointer to firstblock variable */
        xfs_bmap_free_t         *flist, /* list of extents to be freed */
        int                     *logflagsp, /* inode logging flags */
-        xfs_extdelta_t          *delta, /* Change made to incore extents */
        int                     whichfork, /* data or attr fork */
        int                     rsvd)   /* OK to use reserved data blocks */
 {
@@ -524,15 +514,6 @@ xfs_bmap_add_extent(
                        logflags = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
                } else
                        logflags = 0;
-                /* DELTA: single new extent */
-                if (delta) {
-                        if (delta->xed_startoff > new->br_startoff)
-                                delta->xed_startoff = new->br_startoff;
-                        if (delta->xed_blockcount <
-                                        new->br_startoff + new->br_blockcount)
-                                delta->xed_blockcount = new->br_startoff +
-                                                new->br_blockcount;
-                }
        }
        /*
         * Any kind of new delayed allocation goes here.
@@ -542,7 +523,7 @@ xfs_bmap_add_extent(
                        ASSERT((cur->bc_private.b.flags &
                                XFS_BTCUR_BPRV_WASDEL) == 0);
                if ((error = xfs_bmap_add_extent_hole_delay(ip, idx, new,
-                                &logflags, delta, rsvd)))
+                                &logflags, rsvd)))
                        goto done;
        }
        /*
@@ -553,7 +534,7 @@ xfs_bmap_add_extent(
                        ASSERT((cur->bc_private.b.flags &
                                XFS_BTCUR_BPRV_WASDEL) == 0);
                if ((error = xfs_bmap_add_extent_hole_real(ip, idx, cur, new,
-                                &logflags, delta, whichfork)))
+                                &logflags, whichfork)))
                        goto done;
        } else {
                xfs_bmbt_irec_t prev;   /* old extent at offset idx */
@@ -578,17 +559,17 @@ xfs_bmap_add_extent(
                                                XFS_BTCUR_BPRV_WASDEL);
                                if ((error = xfs_bmap_add_extent_delay_real(ip,
                                        idx, &cur, new, &da_new, first, flist,
-                                        &logflags, delta, rsvd)))
+                                        &logflags, rsvd)))
                                        goto done;
                        } else if (new->br_state == XFS_EXT_NORM) {
                                ASSERT(new->br_state == XFS_EXT_NORM);
                                if ((error = xfs_bmap_add_extent_unwritten_real(
-                                        ip, idx, &cur, new, &logflags, delta)))
+                                        ip, idx, &cur, new, &logflags)))
                                        goto done;
                        } else {
                                ASSERT(new->br_state == XFS_EXT_UNWRITTEN);
                                if ((error = xfs_bmap_add_extent_unwritten_real(
-                                        ip, idx, &cur, new, &logflags, delta)))
+                                        ip, idx, &cur, new, &logflags)))
                                        goto done;
                        }
                        ASSERT(*curp == cur || *curp == NULL);
@@ -601,7 +582,7 @@ xfs_bmap_add_extent(
                                ASSERT((cur->bc_private.b.flags &
                                        XFS_BTCUR_BPRV_WASDEL) == 0);
                        if ((error = xfs_bmap_add_extent_hole_real(ip, idx, cur,
-                                        new, &logflags, delta, whichfork)))
+                                        new, &logflags, whichfork)))
                                goto done;
                }
        }
@@ -633,7 +614,7 @@ xfs_bmap_add_extent(
                        nblks += cur->bc_private.b.allocated;
                ASSERT(nblks <= da_old);
                if (nblks < da_old)
-                        xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS,
+                        xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS,
                                (int64_t)(da_old - nblks), rsvd);
        }
        /*
@@ -666,7 +647,6 @@ xfs_bmap_add_extent_delay_real(
        xfs_fsblock_t           *first, /* pointer to firstblock variable */
        xfs_bmap_free_t         *flist, /* list of extents to be freed */
        int                     *logflagsp, /* inode logging flags */
-        xfs_extdelta_t          *delta, /* Change made to incore extents */
        int                     rsvd)   /* OK to use reserved data block allocation */
 {
        xfs_btree_cur_t         *cur;   /* btree cursor */
@@ -797,11 +777,6 @@ xfs_bmap_add_extent_delay_real(
                                goto done;
                }
                *dnew = 0;
-                /* DELTA: Three in-core extents are replaced by one. */
-                temp = LEFT.br_startoff;
-                temp2 = LEFT.br_blockcount +
-                        PREV.br_blockcount +
-                        RIGHT.br_blockcount;
                break;
        case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
@@ -832,10 +807,6 @@ xfs_bmap_add_extent_delay_real(
                                goto done;
                }
                *dnew = 0;
-                /* DELTA: Two in-core extents are replaced by one. */
-                temp = LEFT.br_startoff;
-                temp2 = LEFT.br_blockcount +
-                        PREV.br_blockcount;
                break;
        case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
@@ -867,10 +838,6 @@ xfs_bmap_add_extent_delay_real(
                                goto done;
                }
                *dnew = 0;
-                /* DELTA: Two in-core extents are replaced by one. */
-                temp = PREV.br_startoff;
-                temp2 = PREV.br_blockcount +
-                        RIGHT.br_blockcount;
                break;
        case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
@@ -900,9 +867,6 @@ xfs_bmap_add_extent_delay_real(
                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                }
                *dnew = 0;
-                /* DELTA: The in-core extent described by new changed type. */
-                temp = new->br_startoff;
-                temp2 = new->br_blockcount;
                break;
        case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
@@ -942,10 +906,6 @@ xfs_bmap_add_extent_delay_real(
                xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
                trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
                *dnew = temp;
-                /* DELTA: The boundary between two in-core extents moved. */
-                temp = LEFT.br_startoff;
-                temp2 = LEFT.br_blockcount +
-                        PREV.br_blockcount;
                break;
        case BMAP_LEFT_FILLING:
@@ -990,9 +950,6 @@ xfs_bmap_add_extent_delay_real(
                xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
                trace_xfs_bmap_post_update(ip, idx + 1, state, _THIS_IP_);
                *dnew = temp;
-                /* DELTA: One in-core extent is split in two. */
-                temp = PREV.br_startoff;
-                temp2 = PREV.br_blockcount;
                break;
        case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
@@ -1031,10 +988,6 @@ xfs_bmap_add_extent_delay_real(
                xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
                trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
                *dnew = temp;
-                /* DELTA: The boundary between two in-core extents moved. */
-                temp = PREV.br_startoff;
-                temp2 = PREV.br_blockcount +
-                        RIGHT.br_blockcount;
                break;
        case BMAP_RIGHT_FILLING:
@@ -1078,9 +1031,6 @@ xfs_bmap_add_extent_delay_real(
                xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
                trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
                *dnew = temp;
-                /* DELTA: One in-core extent is split in two. */
-                temp = PREV.br_startoff;
-                temp2 = PREV.br_blockcount;
                break;
        case 0:
@@ -1129,7 +1079,8 @@ xfs_bmap_add_extent_delay_real(
                diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) -
                        (cur ? cur->bc_private.b.allocated : 0));
                if (diff > 0 &&
-                    xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS, -((int64_t)diff), rsvd)) {
+                    xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS,
+                                             -((int64_t)diff), rsvd)) {
                        /*
                         * Ick gross gag me with a spoon.
                         */
@@ -1139,16 +1090,18 @@ xfs_bmap_add_extent_delay_real(
                                        temp--;
                                        diff--;
                                        if (!diff ||
-                                            !xfs_mod_incore_sb(ip->i_mount,
+                                            !xfs_icsb_modify_counters(ip->i_mount,
-                                                    XFS_SBS_FDBLOCKS, -((int64_t)diff), rsvd))
+                                                    XFS_SBS_FDBLOCKS,
+                                                    -((int64_t)diff), rsvd))
                                                break;
                                }
                                if (temp2) {
                                        temp2--;
                                        diff--;
                                        if (!diff ||
-                                            !xfs_mod_incore_sb(ip->i_mount,
+                                            !xfs_icsb_modify_counters(ip->i_mount,
-                                                    XFS_SBS_FDBLOCKS, -((int64_t)diff), rsvd))
+                                                    XFS_SBS_FDBLOCKS,
+                                                    -((int64_t)diff), rsvd))
                                                break;
                                }
                        }
@@ -1161,9 +1114,6 @@ xfs_bmap_add_extent_delay_real(
                        nullstartblock((int)temp2));
                trace_xfs_bmap_post_update(ip, idx + 2, state, _THIS_IP_);
                *dnew = temp + temp2;
-                /* DELTA: One in-core extent is split in three. */
-                temp = PREV.br_startoff;
-                temp2 = PREV.br_blockcount;
                break;
        case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
@@ -1179,13 +1129,6 @@ xfs_bmap_add_extent_delay_real(
                ASSERT(0);
        }
        *curp = cur;
-        if (delta) {
-                temp2 += temp;
-                if (delta->xed_startoff > temp)
-                        delta->xed_startoff = temp;
-                if (delta->xed_blockcount < temp2)
-                        delta->xed_blockcount = temp2;
-        }
 done:
        *logflagsp = rval;
        return error;
@@ -1204,8 +1147,7 @@ xfs_bmap_add_extent_unwritten_real(
        xfs_extnum_t            idx,    /* extent number to update/insert */
        xfs_btree_cur_t         **curp, /* if *curp is null, not a btree */
        xfs_bmbt_irec_t         *new,   /* new data to add to file extents */
-        int                     *logflagsp, /* inode logging flags */
+        int                     *logflagsp) /* inode logging flags */
-        xfs_extdelta_t          *delta) /* Change made to incore extents */
 {
        xfs_btree_cur_t         *cur;   /* btree cursor */
        xfs_bmbt_rec_host_t     *ep;    /* extent entry for idx */
@@ -1219,8 +1161,6 @@ xfs_bmap_add_extent_unwritten_real(
                                        /* left is 0, right is 1, prev is 2 */
        int                     rval=0; /* return value (logging flags) */
        int                     state = 0;/* state bits, accessed thru macros */
-        xfs_filblks_t           temp=0;
-        xfs_filblks_t           temp2=0;
 #define LEFT            r[0]
 #define RIGHT           r[1]
@@ -1341,11 +1281,6 @@ xfs_bmap_add_extent_unwritten_real(
                                RIGHT.br_blockcount, LEFT.br_state)))
                                goto done;
                }
-                /* DELTA: Three in-core extents are replaced by one. */
-                temp = LEFT.br_startoff;
-                temp2 = LEFT.br_blockcount +
-                        PREV.br_blockcount +
-                        RIGHT.br_blockcount;
                break;
        case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
@@ -1382,10 +1317,6 @@ xfs_bmap_add_extent_unwritten_real(
                                LEFT.br_state)))
                                goto done;
                }
-                /* DELTA: Two in-core extents are replaced by one. */
-                temp = LEFT.br_startoff;
-                temp2 = LEFT.br_blockcount +
-                        PREV.br_blockcount;
                break;
        case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
@@ -1422,10 +1353,6 @@ xfs_bmap_add_extent_unwritten_real(
                                newext)))
                                goto done;
                }
-                /* DELTA: Two in-core extents are replaced by one. */
-                temp = PREV.br_startoff;
-                temp2 = PREV.br_blockcount +
-                        RIGHT.br_blockcount;
                break;
        case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
@@ -1453,9 +1380,6 @@ xfs_bmap_add_extent_unwritten_real(
                                newext)))
                                goto done;
                }
-                /* DELTA: The in-core extent described by new changed type. */
-                temp = new->br_startoff;
-                temp2 = new->br_blockcount;
                break;
        case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
@@ -1501,10 +1425,6 @@ xfs_bmap_add_extent_unwritten_real(
                                LEFT.br_state))
                                goto done;
                }
-                /* DELTA: The boundary between two in-core extents moved. */
-                temp = LEFT.br_startoff;
-                temp2 = LEFT.br_blockcount +
-                        PREV.br_blockcount;
                break;
        case BMAP_LEFT_FILLING:
@@ -1544,9 +1464,6 @@ xfs_bmap_add_extent_unwritten_real(
                                goto done;
                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                }
-                /* DELTA: One in-core extent is split in two. */
-                temp = PREV.br_startoff;
-                temp2 = PREV.br_blockcount;
                break;
        case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
@@ -1587,10 +1504,6 @@ xfs_bmap_add_extent_unwritten_real(
                                newext)))
                                goto done;
                }
-                /* DELTA: The boundary between two in-core extents moved. */
-                temp = PREV.br_startoff;
-                temp2 = PREV.br_blockcount +
-                        RIGHT.br_blockcount;
                break;
        case BMAP_RIGHT_FILLING:
@@ -1630,9 +1543,6 @@ xfs_bmap_add_extent_unwritten_real(
                                goto done;
                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                }
-                /* DELTA: One in-core extent is split in two. */
-                temp = PREV.br_startoff;
-                temp2 = PREV.br_blockcount;
                break;
        case 0:
@@ -1692,9 +1602,6 @@ xfs_bmap_add_extent_unwritten_real(
                                goto done;
                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                }
-                /* DELTA: One in-core extent is split in three. */
-                temp = PREV.br_startoff;
-                temp2 = PREV.br_blockcount;
                break;
        case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
@@ -1710,13 +1617,6 @@ xfs_bmap_add_extent_unwritten_real(
                ASSERT(0);
        }
        *curp = cur;
-        if (delta) {
-                temp2 += temp;
-                if (delta->xed_startoff > temp)
-                        delta->xed_startoff = temp;
-                if (delta->xed_blockcount < temp2)
-                        delta->xed_blockcount = temp2;
-        }
 done:
        *logflagsp = rval;
        return error;
@@ -1736,7 +1636,6 @@ xfs_bmap_add_extent_hole_delay(
        xfs_extnum_t            idx,    /* extent number to update/insert */
        xfs_bmbt_irec_t         *new,   /* new data to add to file extents */
        int                     *logflagsp, /* inode logging flags */
-        xfs_extdelta_t          *delta, /* Change made to incore extents */
        int                     rsvd)           /* OK to allocate reserved blocks */
 {
        xfs_bmbt_rec_host_t     *ep;    /* extent record for idx */
@@ -1747,7 +1646,6 @@ xfs_bmap_add_extent_hole_delay(
        xfs_bmbt_irec_t         right;  /* right neighbor extent entry */
        int                     state;  /* state bits, accessed thru macros */
        xfs_filblks_t           temp=0; /* temp for indirect calculations */
-        xfs_filblks_t           temp2=0;
        ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
        ep = xfs_iext_get_ext(ifp, idx);
@@ -1819,9 +1717,6 @@ xfs_bmap_add_extent_hole_delay(
                xfs_iext_remove(ip, idx, 1, state);
                ip->i_df.if_lastex = idx - 1;
-                /* DELTA: Two in-core extents were replaced by one. */
-                temp2 = temp;
-                temp = left.br_startoff;
                break;
        case BMAP_LEFT_CONTIG:
@@ -1841,9 +1736,6 @@ xfs_bmap_add_extent_hole_delay(
                trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_);
                ip->i_df.if_lastex = idx - 1;
-                /* DELTA: One in-core extent grew into a hole. */
-                temp2 = temp;
-                temp = left.br_startoff;
                break;
        case BMAP_RIGHT_CONTIG:
@@ -1862,9 +1754,6 @@ xfs_bmap_add_extent_hole_delay(
                trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
                ip->i_df.if_lastex = idx;
-                /* DELTA: One in-core extent grew into a hole. */
-                temp2 = temp;
-                temp = new->br_startoff;
                break;
        case 0:
@@ -1876,26 +1765,16 @@ xfs_bmap_add_extent_hole_delay(
                oldlen = newlen = 0;
                xfs_iext_insert(ip, idx, 1, new, state);
                ip->i_df.if_lastex = idx;
-                /* DELTA: A new in-core extent was added in a hole. */
-                temp2 = new->br_blockcount;
-                temp = new->br_startoff;
                break;
        }
        if (oldlen != newlen) {
                ASSERT(oldlen > newlen);
-                xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS,
+                xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS,
                        (int64_t)(oldlen - newlen), rsvd);
                /*
                 * Nothing to do for disk quota accounting here.
                 */
        }
-        if (delta) {
-                temp2 += temp;
-                if (delta->xed_startoff > temp)
-                        delta->xed_startoff = temp;
-                if (delta->xed_blockcount < temp2)
-                        delta->xed_blockcount = temp2;
-        }
        *logflagsp = 0;
        return 0;
 }
@@ -1911,7 +1790,6 @@ xfs_bmap_add_extent_hole_real(
        xfs_btree_cur_t         *cur,   /* if null, not a btree */
        xfs_bmbt_irec_t         *new,   /* new data to add to file extents */
        int                     *logflagsp, /* inode logging flags */
-        xfs_extdelta_t          *delta, /* Change made to incore extents */
        int                     whichfork) /* data or attr fork */
 {
        xfs_bmbt_rec_host_t     *ep;    /* pointer to extent entry ins. point */
@@ -1922,8 +1800,6 @@ xfs_bmap_add_extent_hole_real(
        xfs_bmbt_irec_t         right;  /* right neighbor extent entry */
        int                     rval=0; /* return value (logging flags) */
        int                     state;  /* state bits, accessed thru macros */
-        xfs_filblks_t           temp=0;
-        xfs_filblks_t           temp2=0;
        ifp = XFS_IFORK_PTR(ip, whichfork);
        ASSERT(idx <= ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t));
@@ -2020,11 +1896,6 @@ xfs_bmap_add_extent_hole_real(
                                        left.br_state)))
                                goto done;
                }
-                /* DELTA: Two in-core extents were replaced by one. */
-                temp = left.br_startoff;
-                temp2 = left.br_blockcount +
-                        new->br_blockcount +
-                        right.br_blockcount;
                break;
        case BMAP_LEFT_CONTIG:
@@ -2056,10 +1927,6 @@ xfs_bmap_add_extent_hole_real(
                                        left.br_state)))
                                goto done;
                }
-                /* DELTA: One in-core extent grew. */
-                temp = left.br_startoff;
-                temp2 = left.br_blockcount +
-                        new->br_blockcount;
                break;
        case BMAP_RIGHT_CONTIG:
@@ -2092,10 +1959,6 @@ xfs_bmap_add_extent_hole_real(
                                        right.br_state)))
                                goto done;
                }
-                /* DELTA: One in-core extent grew. */
-                temp = new->br_startoff;
-                temp2 = new->br_blockcount +
-                        right.br_blockcount;
                break;
        case 0:
@@ -2123,18 +1986,8 @@ xfs_bmap_add_extent_hole_real(
                                goto done;
                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                }
-                /* DELTA: A new extent was added in a hole. */
-                temp = new->br_startoff;
-                temp2 = new->br_blockcount;
                break;
        }
-        if (delta) {
-                temp2 += temp;
-                if (delta->xed_startoff > temp)
-                        delta->xed_startoff = temp;
-                if (delta->xed_blockcount < temp2)
-                        delta->xed_blockcount = temp2;
-        }
 done:
        *logflagsp = rval;
        return error;
@@ -2959,7 +2812,6 @@ xfs_bmap_del_extent(
        xfs_btree_cur_t         *cur,   /* if null, not a btree */
        xfs_bmbt_irec_t         *del,   /* data to remove from extents */
        int                     *logflagsp, /* inode logging flags */
-        xfs_extdelta_t          *delta, /* Change made to incore extents */
        int                     whichfork, /* data or attr fork */
        int                     rsvd)   /* OK to allocate reserved blocks */
 {
@@ -3262,16 +3114,9 @@ xfs_bmap_del_extent(
         * Nothing to do for disk quota accounting here.
         */
        ASSERT(da_old >= da_new);
-        if (da_old > da_new)
+        if (da_old > da_new) {
-                xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, (int64_t)(da_old - da_new),
+                xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
-                        rsvd);
+                        (int64_t)(da_old - da_new), rsvd);
-        if (delta) {
-                /* DELTA: report the original extent. */
-                if (delta->xed_startoff > got.br_startoff)
-                        delta->xed_startoff = got.br_startoff;
-                if (delta->xed_blockcount < got.br_startoff+got.br_blockcount)
-                        delta->xed_blockcount = got.br_startoff +
-                                                        got.br_blockcount;
        }
 done:
        *logflagsp = flags;
@@ -3754,9 +3599,10 @@ xfs_bmap_add_attrfork(
                ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
        }
        ASSERT(ip->i_d.di_anextents == 0);
-        IHOLD(ip);
-        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+        xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL);
        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
        switch (ip->i_d.di_format) {
        case XFS_DINODE_FMT_DEV:
                ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3;
@@ -4483,8 +4329,7 @@ xfs_bmapi(
        xfs_extlen_t    total,          /* total blocks needed */
        xfs_bmbt_irec_t *mval,          /* output: map values */
        int             *nmap,          /* i/o: mval size/count */
-        xfs_bmap_free_t *flist,         /* i/o: list extents to free */
+        xfs_bmap_free_t *flist)         /* i/o: list extents to free */
-        xfs_extdelta_t  *delta)         /* o: change made to incore extents */
 {
        xfs_fsblock_t   abno;           /* allocated block number */
        xfs_extlen_t    alen;           /* allocated extent length */
@@ -4596,10 +4441,7 @@ xfs_bmapi(
        end = bno + len;
        obno = bno;
        bma.ip = NULL;
-        if (delta) {
-                delta->xed_startoff = NULLFILEOFF;
-                delta->xed_blockcount = 0;
-        }
        while (bno < end && n < *nmap) {
                /*
                 * Reading past eof, act as though there's a hole
@@ -4620,19 +4462,13 @@ xfs_bmapi(
                         * allocate the stuff asked for in this bmap call
                         * but that wouldn't be as good.
                         */
-                        if (wasdelay && !(flags & XFS_BMAPI_EXACT)) {
+                        if (wasdelay) {
                                alen = (xfs_extlen_t)got.br_blockcount;
                                aoff = got.br_startoff;
                                if (lastx != NULLEXTNUM && lastx) {
                                        ep = xfs_iext_get_ext(ifp, lastx - 1);
                                        xfs_bmbt_get_all(ep, &prev);
                                }
-                        } else if (wasdelay) {
-                                alen = (xfs_extlen_t)
-                                        XFS_FILBLKS_MIN(len,
-                                                (got.br_startoff +
-                                                 got.br_blockcount) - bno);
-                                aoff = bno;
                        } else {
                                alen = (xfs_extlen_t)
                                        XFS_FILBLKS_MIN(len, MAXEXTLEN);
@@ -4694,13 +4530,13 @@ xfs_bmapi(
                                                        -((int64_t)extsz), (flags &
                                                        XFS_BMAPI_RSVBLOCKS));
                                } else {
-                                        error = xfs_mod_incore_sb(mp,
+                                        error = xfs_icsb_modify_counters(mp,
                                                        XFS_SBS_FDBLOCKS,
                                                        -((int64_t)alen), (flags &
                                                        XFS_BMAPI_RSVBLOCKS));
                                }
                                if (!error) {
-                                        error = xfs_mod_incore_sb(mp,
+                                        error = xfs_icsb_modify_counters(mp,
                                                        XFS_SBS_FDBLOCKS,
                                                        -((int64_t)indlen), (flags &
                                                        XFS_BMAPI_RSVBLOCKS));
@@ -4710,7 +4546,7 @@ xfs_bmapi(
                                                        (int64_t)extsz, (flags &
                                                        XFS_BMAPI_RSVBLOCKS));
                                        else if (error)
-                                                xfs_mod_incore_sb(mp,
+                                                xfs_icsb_modify_counters(mp,
                                                        XFS_SBS_FDBLOCKS,
                                                        (int64_t)alen, (flags &
                                                        XFS_BMAPI_RSVBLOCKS));
@@ -4831,7 +4667,7 @@ xfs_bmapi(
                                        got.br_state = XFS_EXT_UNWRITTEN;
                        }
                        error = xfs_bmap_add_extent(ip, lastx, &cur, &got,
-                                firstblock, flist, &tmp_logflags, delta,
+                                firstblock, flist, &tmp_logflags,
                                whichfork, (flags & XFS_BMAPI_RSVBLOCKS));
                        logflags |= tmp_logflags;
                        if (error)
@@ -4912,8 +4748,12 @@ xfs_bmapi(
                 * Check if writing previously allocated but
                 * unwritten extents.
                 */
-                if (wr && mval->br_state == XFS_EXT_UNWRITTEN &&
+                if (wr &&
-                    ((flags & (XFS_BMAPI_PREALLOC|XFS_BMAPI_DELAY)) == 0)) {
+                    ((mval->br_state == XFS_EXT_UNWRITTEN &&
+                      ((flags & (XFS_BMAPI_PREALLOC|XFS_BMAPI_DELAY)) == 0)) ||
+                     (mval->br_state == XFS_EXT_NORM &&
+                      ((flags & (XFS_BMAPI_PREALLOC|XFS_BMAPI_CONVERT)) ==
+                                (XFS_BMAPI_PREALLOC|XFS_BMAPI_CONVERT))))) {
                        /*
                         * Modify (by adding) the state flag, if writing.
                         */
@@ -4925,9 +4765,11 @@ xfs_bmapi(
                                        *firstblock;
                                cur->bc_private.b.flist = flist;
                        }
-                        mval->br_state = XFS_EXT_NORM;
+                        mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
+                                                ? XFS_EXT_NORM
+                                                : XFS_EXT_UNWRITTEN;
                        error = xfs_bmap_add_extent(ip, lastx, &cur, mval,
-                                firstblock, flist, &tmp_logflags, delta,
+                                firstblock, flist, &tmp_logflags,
                                whichfork, (flags & XFS_BMAPI_RSVBLOCKS));
                        logflags |= tmp_logflags;
                        if (error)
@@ -5017,14 +4859,6 @@ xfs_bmapi(
        ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE ||
               XFS_IFORK_NEXTENTS(ip, whichfork) > ifp->if_ext_max);
        error = 0;
-        if (delta && delta->xed_startoff != NULLFILEOFF) {
-                /* A change was actually made.
-                 * Note that delta->xed_blockount is an offset at this
-                 * point and needs to be converted to a block count.
-                 */
-                ASSERT(delta->xed_blockcount > delta->xed_startoff);
-                delta->xed_blockcount -= delta->xed_startoff;
-        }
 error0:
        /*
         * Log everything.  Do this after conversion, there's no point in
@@ -5136,8 +4970,6 @@ xfs_bunmapi(
        xfs_fsblock_t           *firstblock,    /* first allocated block
                                                   controls a.g. for allocs */
        xfs_bmap_free_t         *flist,         /* i/o: list extents to free */
-        xfs_extdelta_t          *delta,         /* o: change made to incore
-                                                   extents */
        int                     *done)          /* set if not done yet */
 {
        xfs_btree_cur_t         *cur;           /* bmap btree cursor */
@@ -5196,10 +5028,7 @@ xfs_bunmapi(
        bno = start + len - 1;
        ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
                &prev);
-        if (delta) {
-                delta->xed_startoff = NULLFILEOFF;
-                delta->xed_blockcount = 0;
-        }
        /*
         * Check to see if the given block number is past the end of the
         * file, back up to the last block if so...
@@ -5297,7 +5126,7 @@ xfs_bunmapi(
                        }
                        del.br_state = XFS_EXT_UNWRITTEN;
                        error = xfs_bmap_add_extent(ip, lastx, &cur, &del,
-                                firstblock, flist, &logflags, delta,
+                                firstblock, flist, &logflags,
                                XFS_DATA_FORK, 0);
                        if (error)
                                goto error0;
@@ -5352,7 +5181,7 @@ xfs_bunmapi(
                                prev.br_state = XFS_EXT_UNWRITTEN;
                                error = xfs_bmap_add_extent(ip, lastx - 1, &cur,
                                        &prev, firstblock, flist, &logflags,
-                                        delta, XFS_DATA_FORK, 0);
+                                        XFS_DATA_FORK, 0);
                                if (error)
                                        goto error0;
                                goto nodelete;
@@ -5361,7 +5190,7 @@ xfs_bunmapi(
                                del.br_state = XFS_EXT_UNWRITTEN;
                                error = xfs_bmap_add_extent(ip, lastx, &cur,
                                        &del, firstblock, flist, &logflags,
-                                        delta, XFS_DATA_FORK, 0);
+                                        XFS_DATA_FORK, 0);
                                if (error)
                                        goto error0;
                                goto nodelete;
@@ -5381,7 +5210,7 @@ xfs_bunmapi(
                                        ip, -((long)del.br_blockcount), 0,
                                        XFS_QMOPT_RES_RTBLKS);
                        } else {
-                                xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS,
+                                xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
                                                (int64_t)del.br_blockcount, rsvd);
                                (void)xfs_trans_reserve_quota_nblks(NULL,
                                        ip, -((long)del.br_blockcount), 0,
@@ -5414,7 +5243,7 @@ xfs_bunmapi(
                        goto error0;
                }
                error = xfs_bmap_del_extent(ip, tp, lastx, flist, cur, &del,
-                                &tmp_logflags, delta, whichfork, rsvd);
+                                &tmp_logflags, whichfork, rsvd);
                logflags |= tmp_logflags;
                if (error)
                        goto error0;
@@ -5471,14 +5300,6 @@ nodelete:
        ASSERT(ifp->if_ext_max ==
               XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
        error = 0;
-        if (delta && delta->xed_startoff != NULLFILEOFF) {
-                /* A change was actually made.
-                 * Note that delta->xed_blockount is an offset at this
-                 * point and needs to be converted to a block count.
-                 */
-                ASSERT(delta->xed_blockcount > delta->xed_startoff);
-                delta->xed_blockcount -= delta->xed_startoff;
-        }
 error0:
        /*
         * Log everything.  Do this after conversion, there's no point in
@@ -5605,28 +5426,6 @@ xfs_getbmap(
                prealloced = 0;
                fixlen = 1LL << 32;
        } else {
-                /*
-                 * If the BMV_IF_NO_DMAPI_READ interface bit specified, do
-                 * not generate a DMAPI read event.  Otherwise, if the
-                 * DM_EVENT_READ bit is set for the file, generate a read
-                 * event in order that the DMAPI application may do its thing
-                 * before we return the extents.  Usually this means restoring
-                 * user file data to regions of the file that look like holes.
-                 *
-                 * The "old behavior" (from XFS_IOC_GETBMAP) is to not specify
-                 * BMV_IF_NO_DMAPI_READ so that read events are generated.
-                 * If this were not true, callers of ioctl(XFS_IOC_GETBMAP)
-                 * could misinterpret holes in a DMAPI file as true holes,
-                 * when in fact they may represent offline user data.
-                 */
-                if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) &&
-                    !(iflags & BMV_IF_NO_DMAPI_READ)) {
-                        error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip,
-                                              0, 0, 0, NULL);
-                        if (error)
-                                return XFS_ERROR(error);
-                }
                if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
                    ip->i_d.di_format != XFS_DINODE_FMT_BTREE &&
                    ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
@@ -5713,7 +5512,7 @@ xfs_getbmap(
                error = xfs_bmapi(NULL, ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset),
                                  XFS_BB_TO_FSB(mp, bmv->bmv_length),
                                  bmapi_flags, NULL, 0, map, &nmap,
-                                  NULL, NULL);
+                                  NULL);
                if (error)
                        goto out_free_map;
                ASSERT(nmap <= subnex);
@@ -5744,12 +5543,24 @@ xfs_getbmap(
                                        map[i].br_startblock))
                                goto out_free_map;
-                        nexleft--;
                        bmv->bmv_offset =
                                out[cur_ext].bmv_offset +
                                out[cur_ext].bmv_length;
                        bmv->bmv_length =
                                max_t(__int64_t, 0, bmvend - bmv->bmv_offset);
+                        /*
+                         * In case we don't want to return the hole,
+                         * don't increase cur_ext so that we can reuse
+                         * it in the next loop.
+                         */
+                        if ((iflags & BMV_IF_NO_HOLES) &&
+                            map[i].br_startblock == HOLESTARTBLOCK) {
+                                memset(&out[cur_ext], 0, sizeof(out[cur_ext]));
+                                continue;
+                        }
+                        nexleft--;
                        bmv->bmv_entries++;
                        cur_ext++;
                }
@@ -5859,66 +5670,34 @@ xfs_bmap_eof(
 }
 #ifdef DEBUG
-STATIC
+STATIC struct xfs_buf *
-xfs_buf_t *
 xfs_bmap_get_bp(
-        xfs_btree_cur_t         *cur,
+        struct xfs_btree_cur    *cur,
        xfs_fsblock_t           bno)
 {
-        int i;
+        struct xfs_log_item_desc *lidp;
-        xfs_buf_t *bp;
+        int                     i;
        if (!cur)
-                return(NULL);
+                return NULL;
-        bp = NULL;
-        for(i = 0; i < XFS_BTREE_MAXLEVELS; i++) {
-                bp = cur->bc_bufs[i];
-                if (!bp) break;
-                if (XFS_BUF_ADDR(bp) == bno)
-                        break;  /* Found it */
-        }
-        if (i == XFS_BTREE_MAXLEVELS)
-                bp = NULL;
-        if (!bp) { /* Chase down all the log items to see if the bp is there */
-                xfs_log_item_chunk_t    *licp;
-                xfs_trans_t             *tp;
-                tp = cur->bc_tp;
-                licp = &tp->t_items;
-                while (!bp && licp != NULL) {
-                        if (xfs_lic_are_all_free(licp)) {
-                                licp = licp->lic_next;
-                                continue;
-                        }
-                        for (i = 0; i < licp->lic_unused; i++) {
-                                xfs_log_item_desc_t     *lidp;
-                                xfs_log_item_t          *lip;
-                                xfs_buf_log_item_t      *bip;
-                                xfs_buf_t               *lbp;
-                                if (xfs_lic_isfree(licp, i)) {
-                                        continue;
-                                }
-                                lidp = xfs_lic_slot(licp, i);
-                                lip = lidp->lid_item;
-                                if (lip->li_type != XFS_LI_BUF)
-                                        continue;
-                                bip = (xfs_buf_log_item_t *)lip;
+        for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) {
-                                lbp = bip->bli_buf;
+                if (!cur->bc_bufs[i])
+                        break;
+                if (XFS_BUF_ADDR(cur->bc_bufs[i]) == bno)
+                        return cur->bc_bufs[i];
+        }
-                                if (XFS_BUF_ADDR(lbp) == bno) {
+        /* Chase down all the log items to see if the bp is there */
-                                        bp = lbp;
+        list_for_each_entry(lidp, &cur->bc_tp->t_items, lid_trans) {
-                                        break; /* Found it */
+                struct xfs_buf_log_item *bip;
-                                }
+                bip = (struct xfs_buf_log_item *)lidp->lid_item;
-                        }
+                if (bip->bli_item.li_type == XFS_LI_BUF &&
-                        licp = licp->lic_next;
+                    XFS_BUF_ADDR(bip->bli_buf) == bno)
-                }
+                        return bip->bli_buf;
        }
-        return(bp);
+        return NULL;
 }
 STATIC void
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 419dafb9d87d..71ec9b6ecdfc 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -28,20 +28,6 @@ struct xfs_trans;
 extern kmem_zone_t      *xfs_bmap_free_item_zone;
 /*
- * DELTA: describe a change to the in-core extent list.
- *
- * Internally the use of xed_blockount is somewhat funky.
- * xed_blockcount contains an offset much of the time because this
- * makes merging changes easier.  (xfs_fileoff_t and xfs_filblks_t are
- * the same underlying type).
- */
-typedef struct xfs_extdelta
-{
-        xfs_fileoff_t           xed_startoff;   /* offset of range */
-        xfs_filblks_t           xed_blockcount; /* blocks in range */
-} xfs_extdelta_t;
-/*
 * List of extents to be free "later".
 * The list is kept sorted on xbf_startblock.
 */
@@ -82,27 +68,25 @@ typedef	struct xfs_bmap_free
 #define XFS_BMAPI_DELAY         0x002   /* delayed write operation */
 #define XFS_BMAPI_ENTIRE        0x004   /* return entire extent, not trimmed */
 #define XFS_BMAPI_METADATA      0x008   /* mapping metadata not user data */
-#define XFS_BMAPI_EXACT         0x010   /* allocate only to spec'd bounds */
+#define XFS_BMAPI_ATTRFORK      0x010   /* use attribute fork not data */
-#define XFS_BMAPI_ATTRFORK      0x020   /* use attribute fork not data */
+#define XFS_BMAPI_RSVBLOCKS     0x020   /* OK to alloc. reserved data blocks */
-#define XFS_BMAPI_ASYNC         0x040   /* bunmapi xactions can be async */
+#define XFS_BMAPI_PREALLOC      0x040   /* preallocation op: unwritten space */
-#define XFS_BMAPI_RSVBLOCKS     0x080   /* OK to alloc. reserved data blocks */
+#define XFS_BMAPI_IGSTATE       0x080   /* Ignore state - */
-#define XFS_BMAPI_PREALLOC      0x100   /* preallocation op: unwritten space */
-#define XFS_BMAPI_IGSTATE       0x200   /* Ignore state - */
                                        /* combine contig. space */
-#define XFS_BMAPI_CONTIG        0x400   /* must allocate only one extent */
+#define XFS_BMAPI_CONTIG        0x100   /* must allocate only one extent */
-/*      XFS_BMAPI_DIRECT_IO     0x800   */
+/*
-#define XFS_BMAPI_CONVERT       0x1000  /* unwritten extent conversion - */
+ * unwritten extent conversion - this needs write cache flushing and no additional
-                                        /* need write cache flushing and no */
+ * allocation alignments. When specified with XFS_BMAPI_PREALLOC it converts
-                                        /* additional allocation alignments */
+ * from written to unwritten, otherwise convert from unwritten to written.
+ */
+#define XFS_BMAPI_CONVERT       0x200
 #define XFS_BMAPI_FLAGS \
        { XFS_BMAPI_WRITE,      "WRITE" }, \
        { XFS_BMAPI_DELAY,      "DELAY" }, \
        { XFS_BMAPI_ENTIRE,     "ENTIRE" }, \
        { XFS_BMAPI_METADATA,   "METADATA" }, \
-        { XFS_BMAPI_EXACT,      "EXACT" }, \
        { XFS_BMAPI_ATTRFORK,   "ATTRFORK" }, \
-        { XFS_BMAPI_ASYNC,      "ASYNC" }, \
        { XFS_BMAPI_RSVBLOCKS,  "RSVBLOCKS" }, \
        { XFS_BMAPI_PREALLOC,   "PREALLOC" }, \
        { XFS_BMAPI_IGSTATE,    "IGSTATE" }, \
@@ -310,9 +294,7 @@ xfs_bmapi(
        xfs_extlen_t            total,          /* total blocks needed */
        struct xfs_bmbt_irec    *mval,          /* output: map values */
        int                     *nmap,          /* i/o: mval size/count */
-        xfs_bmap_free_t         *flist,         /* i/o: list extents to free */
+        xfs_bmap_free_t         *flist);        /* i/o: list extents to free */
-        xfs_extdelta_t          *delta);        /* o: change made to incore
-                                                   extents */
 /*
 * Map file blocks to filesystem blocks, simple version.
@@ -346,8 +328,6 @@ xfs_bunmapi(
        xfs_fsblock_t           *firstblock,    /* first allocated block
                                                   controls a.g. for allocs */
        xfs_bmap_free_t         *flist,         /* i/o: list extents to free */
-        xfs_extdelta_t          *delta,         /* o: change made to incore
-                                                   extents */
        int                     *done);         /* set if not done yet */
 /*
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 416e47e54b83..87d3c10b6954 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -24,21 +24,16 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_inode_item.h"
 #include "xfs_alloc.h"
 #include "xfs_btree.h"
 #include "xfs_btree_trace.h"
-#include "xfs_ialloc.h"
 #include "xfs_itable.h"
 #include "xfs_bmap.h"
 #include "xfs_error.h"
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index 96be4b0f2496..04f9cca8da7e 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -24,20 +24,15 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_inode_item.h"
 #include "xfs_btree.h"
 #include "xfs_btree_trace.h"
-#include "xfs_ialloc.h"
 #include "xfs_error.h"
 #include "xfs_trace.h"
@@ -222,7 +217,7 @@ xfs_btree_del_cursor(
         */
        for (i = 0; i < cur->bc_nlevels; i++) {
                if (cur->bc_bufs[i])
-                        xfs_btree_setbuf(cur, i, NULL);
+                        xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]);
                else if (!error)
                        break;
        }
@@ -661,7 +656,7 @@ xfs_btree_reada_bufl(
        ASSERT(fsbno != NULLFSBLOCK);
        d = XFS_FSB_TO_DADDR(mp, fsbno);
-        xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
+        xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count);
 }
 /*
@@ -681,7 +676,7 @@ xfs_btree_reada_bufs(
        ASSERT(agno != NULLAGNUMBER);
        ASSERT(agbno != NULLAGBLOCK);
        d = XFS_AGB_TO_DADDR(mp, agno, agbno);
-        xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
+        xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count);
 }
 STATIC int
@@ -768,22 +763,19 @@ xfs_btree_readahead(
 * Set the buffer for level "lev" in the cursor to bp, releasing
 * any previous buffer.
 */
-void
+STATIC void
 xfs_btree_setbuf(
        xfs_btree_cur_t         *cur,   /* btree cursor */
        int                     lev,    /* level in btree */
        xfs_buf_t               *bp)    /* new buffer to set */
 {
        struct xfs_btree_block  *b;     /* btree block */
-        xfs_buf_t               *obp;   /* old buffer pointer */
-        obp = cur->bc_bufs[lev];
+        if (cur->bc_bufs[lev])
-        if (obp)
+                xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[lev]);
-                xfs_trans_brelse(cur->bc_tp, obp);
        cur->bc_bufs[lev] = bp;
        cur->bc_ra[lev] = 0;
-        if (!bp)
-                return;
        b = XFS_BUF_TO_BLOCK(bp);
        if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
                if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO)
@@ -3016,6 +3008,43 @@ out0:
        return 0;
 }
+/*
+ * Kill the current root node, and replace it with it's only child node.
+ */
+STATIC int
+xfs_btree_kill_root(
+        struct xfs_btree_cur    *cur,
+        struct xfs_buf          *bp,
+        int                     level,
+        union xfs_btree_ptr     *newroot)
+{
+        int                     error;
+        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+        XFS_BTREE_STATS_INC(cur, killroot);
+        /*
+         * Update the root pointer, decreasing the level by 1 and then
+         * free the old root.
+         */
+        cur->bc_ops->set_root(cur, newroot, -1);
+        error = cur->bc_ops->free_block(cur, bp);
+        if (error) {
+                XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+                return error;
+        }
+        XFS_BTREE_STATS_INC(cur, free);
+        cur->bc_bufs[level] = NULL;
+        cur->bc_ra[level] = 0;
+        cur->bc_nlevels--;
+        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+        return 0;
+}
 STATIC int
 xfs_btree_dec_cursor(
        struct xfs_btree_cur    *cur,
@@ -3200,7 +3229,7 @@ xfs_btree_delrec(
                         * Make it the new root of the btree.
                         */
                        pp = xfs_btree_ptr_addr(cur, 1, block);
-                        error = cur->bc_ops->kill_root(cur, bp, level, pp);
+                        error = xfs_btree_kill_root(cur, bp, level, pp);
                        if (error)
                                goto error0;
                } else if (level > 0) {
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h
index 7fa07062bdda..82fafc66bd1f 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -152,9 +152,7 @@ struct xfs_btree_ops {
        /* update btree root pointer */
        void    (*set_root)(struct xfs_btree_cur *cur,
-                                union xfs_btree_ptr *nptr, int level_change);
+                            union xfs_btree_ptr *nptr, int level_change);
-        int     (*kill_root)(struct xfs_btree_cur *cur, struct xfs_buf *bp,
-                                int level, union xfs_btree_ptr *newroot);
        /* block allocation / freeing */
        int     (*alloc_block)(struct xfs_btree_cur *cur,
@@ -399,16 +397,6 @@ xfs_btree_reada_bufs(
        xfs_agblock_t           agbno,  /* allocation group block number */
        xfs_extlen_t            count); /* count of filesystem blocks */
-/*
- * Set the buffer for level "lev" in the cursor to bp, releasing
- * any previous buffer.
- */
-void
-xfs_btree_setbuf(
-        xfs_btree_cur_t         *cur,   /* btree cursor */
-        int                     lev,    /* level in btree */
-        struct xfs_buf          *bp);   /* new buffer to set */
 /*
 * Common btree core entry points.
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 02a80984aa05..2686d0d54c5b 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -24,7 +24,6 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_buf_item.h"
 #include "xfs_trans_priv.h"
@@ -34,6 +33,12 @@
 kmem_zone_t     *xfs_buf_item_zone;
+static inline struct xfs_buf_log_item *BUF_ITEM(struct xfs_log_item *lip)
+{
+        return container_of(lip, struct xfs_buf_log_item, bli_item);
+}
 #ifdef XFS_TRANS_DEBUG
 /*
 * This function uses an alternate strategy for tracking the bytes
@@ -151,12 +156,13 @@ STATIC void	xfs_buf_do_callbacks(xfs_buf_t *bp, xfs_log_item_t *lip);
 */
 STATIC uint
 xfs_buf_item_size(
-        xfs_buf_log_item_t      *bip)
+        struct xfs_log_item     *lip)
 {
-        uint            nvecs;
+        struct xfs_buf_log_item *bip = BUF_ITEM(lip);
-        int             next_bit;
+        struct xfs_buf          *bp = bip->bli_buf;
-        int             last_bit;
+        uint                    nvecs;
-        xfs_buf_t       *bp;
+        int                     next_bit;
+        int                     last_bit;
        ASSERT(atomic_read(&bip->bli_refcount) > 0);
        if (bip->bli_flags & XFS_BLI_STALE) {
@@ -170,7 +176,6 @@ xfs_buf_item_size(
                return 1;
        }
-        bp = bip->bli_buf;
        ASSERT(bip->bli_flags & XFS_BLI_LOGGED);
        nvecs = 1;
        last_bit = xfs_next_bit(bip->bli_format.blf_data_map,
@@ -219,13 +224,13 @@ xfs_buf_item_size(
 */
 STATIC void
 xfs_buf_item_format(
-        xfs_buf_log_item_t      *bip,
+        struct xfs_log_item     *lip,
-        xfs_log_iovec_t         *log_vector)
+        struct xfs_log_iovec    *vecp)
 {
+        struct xfs_buf_log_item *bip = BUF_ITEM(lip);
+        struct xfs_buf  *bp = bip->bli_buf;
        uint            base_size;
        uint            nvecs;
-        xfs_log_iovec_t *vecp;
-        xfs_buf_t       *bp;
        int             first_bit;
        int             last_bit;
        int             next_bit;
@@ -235,8 +240,6 @@ xfs_buf_item_format(
        ASSERT(atomic_read(&bip->bli_refcount) > 0);
        ASSERT((bip->bli_flags & XFS_BLI_LOGGED) ||
               (bip->bli_flags & XFS_BLI_STALE));
-        bp = bip->bli_buf;
-        vecp = log_vector;
        /*
         * The size of the base structure is the size of the
@@ -248,7 +251,7 @@ xfs_buf_item_format(
        base_size =
                (uint)(sizeof(xfs_buf_log_format_t) +
                       ((bip->bli_format.blf_map_size - 1) * sizeof(uint)));
-        vecp->i_addr = (xfs_caddr_t)&bip->bli_format;
+        vecp->i_addr = &bip->bli_format;
        vecp->i_len = base_size;
        vecp->i_type = XLOG_REG_TYPE_BFORMAT;
        vecp++;
@@ -263,7 +266,7 @@ xfs_buf_item_format(
         */
        if (bip->bli_flags & XFS_BLI_INODE_BUF) {
                if (!((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) &&
-                      xfs_log_item_in_current_chkpt(&bip->bli_item)))
+                      xfs_log_item_in_current_chkpt(lip)))
                        bip->bli_format.blf_flags |= XFS_BLF_INODE_BUF;
                bip->bli_flags &= ~XFS_BLI_INODE_BUF;
        }
@@ -356,66 +359,90 @@ xfs_buf_item_format(
 /*
 * This is called to pin the buffer associated with the buf log item in memory
- * so it cannot be written out.  Simply call bpin() on the buffer to do this.
+ * so it cannot be written out.
 *
 * We also always take a reference to the buffer log item here so that the bli
 * is held while the item is pinned in memory. This means that we can
 * unconditionally drop the reference count a transaction holds when the
 * transaction is completed.
 */
 STATIC void
 xfs_buf_item_pin(
-        xfs_buf_log_item_t      *bip)
+        struct xfs_log_item     *lip)
 {
-        xfs_buf_t       *bp;
+        struct xfs_buf_log_item *bip = BUF_ITEM(lip);
-        bp = bip->bli_buf;
+        ASSERT(XFS_BUF_ISBUSY(bip->bli_buf));
-        ASSERT(XFS_BUF_ISBUSY(bp));
        ASSERT(atomic_read(&bip->bli_refcount) > 0);
        ASSERT((bip->bli_flags & XFS_BLI_LOGGED) ||
               (bip->bli_flags & XFS_BLI_STALE));
-        atomic_inc(&bip->bli_refcount);
        trace_xfs_buf_item_pin(bip);
-        xfs_bpin(bp);
-}
+        atomic_inc(&bip->bli_refcount);
+        atomic_inc(&bip->bli_buf->b_pin_count);
+}
 /*
 * This is called to unpin the buffer associated with the buf log
 * item which was previously pinned with a call to xfs_buf_item_pin().
- * Just call bunpin() on the buffer to do this.
 *
 * Also drop the reference to the buf item for the current transaction.
 * If the XFS_BLI_STALE flag is set and we are the last reference,
 * then free up the buf log item and unlock the buffer.
+ *
+ * If the remove flag is set we are called from uncommit in the
+ * forced-shutdown path.  If that is true and the reference count on
+ * the log item is going to drop to zero we need to free the item's
+ * descriptor in the transaction.
 */
 STATIC void
 xfs_buf_item_unpin(
-        xfs_buf_log_item_t      *bip)
+        struct xfs_log_item     *lip,
+        int                     remove)
 {
-        struct xfs_ail  *ailp;
+        struct xfs_buf_log_item *bip = BUF_ITEM(lip);
-        xfs_buf_t       *bp;
+        xfs_buf_t       *bp = bip->bli_buf;
-        int             freed;
+        struct xfs_ail  *ailp = lip->li_ailp;
        int             stale = bip->bli_flags & XFS_BLI_STALE;
+        int             freed;
-        bp = bip->bli_buf;
-        ASSERT(bp != NULL);
        ASSERT(XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *) == bip);
        ASSERT(atomic_read(&bip->bli_refcount) > 0);
        trace_xfs_buf_item_unpin(bip);
        freed = atomic_dec_and_test(&bip->bli_refcount);
-        ailp = bip->bli_item.li_ailp;
-        xfs_bunpin(bp);
+        if (atomic_dec_and_test(&bp->b_pin_count))
+                wake_up_all(&bp->b_waiters);
        if (freed && stale) {
                ASSERT(bip->bli_flags & XFS_BLI_STALE);
                ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
                ASSERT(!(XFS_BUF_ISDELAYWRITE(bp)));
                ASSERT(XFS_BUF_ISSTALE(bp));
                ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL);
                trace_xfs_buf_item_unpin_stale(bip);
+                if (remove) {
+                        /*
+                         * We have to remove the log item from the transaction
+                         * as we are about to release our reference to the
+                         * buffer.  If we don't, the unlock that occurs later
+                         * in xfs_trans_uncommit() will ry to reference the
+                         * buffer which we no longer have a hold on.
+                         */
+                        xfs_trans_del_item(lip);
+                        /*
+                         * Since the transaction no longer refers to the buffer,
+                         * the buffer should no longer refer to the transaction.
+                         */
+                        XFS_BUF_SET_FSPRIVATE2(bp, NULL);
+                }
                /*
                 * If we get called here because of an IO error, we may
                 * or may not have the item on the AIL. xfs_trans_ail_delete()
@@ -437,48 +464,6 @@ xfs_buf_item_unpin(
 }
 /*
- * this is called from uncommit in the forced-shutdown path.
- * we need to check to see if the reference count on the log item
- * is going to drop to zero.  If so, unpin will free the log item
- * so we need to free the item's descriptor (that points to the item)
- * in the transaction.
- */
-STATIC void
-xfs_buf_item_unpin_remove(
-        xfs_buf_log_item_t      *bip,
-        xfs_trans_t             *tp)
-{
-        /* will xfs_buf_item_unpin() call xfs_buf_item_relse()? */
-        if ((atomic_read(&bip->bli_refcount) == 1) &&
-            (bip->bli_flags & XFS_BLI_STALE)) {
-                /*
-                 * yes -- We can safely do some work here and then call
-                 * buf_item_unpin to do the rest because we are
-                 * are holding the buffer locked so no one else will be
-                 * able to bump up the refcount. We have to remove the
-                 * log item from the transaction as we are about to release
-                 * our reference to the buffer. If we don't, the unlock that
-                 * occurs later in the xfs_trans_uncommit() will try to
-                 * reference the buffer which we no longer have a hold on.
-                 */
-                struct xfs_log_item_desc *lidp;
-                ASSERT(XFS_BUF_VALUSEMA(bip->bli_buf) <= 0);
-                trace_xfs_buf_item_unpin_stale(bip);
-                lidp = xfs_trans_find_item(tp, (xfs_log_item_t *)bip);
-                xfs_trans_free_item(tp, lidp);
-                /*
-                 * Since the transaction no longer refers to the buffer, the
-                 * buffer should no longer refer to the transaction.
-                 */
-                XFS_BUF_SET_FSPRIVATE2(bip->bli_buf, NULL);
-        }
-        xfs_buf_item_unpin(bip);
-}
-/*
 * This is called to attempt to lock the buffer associated with this
 * buf log item.  Don't sleep on the buffer lock.  If we can't get
 * the lock right away, return 0.  If we can get the lock, take a
@@ -488,11 +473,11 @@ xfs_buf_item_unpin_remove(
 */
 STATIC uint
 xfs_buf_item_trylock(
-        xfs_buf_log_item_t      *bip)
+        struct xfs_log_item     *lip)
 {
-        xfs_buf_t       *bp;
+        struct xfs_buf_log_item *bip = BUF_ITEM(lip);
+        struct xfs_buf          *bp = bip->bli_buf;
-        bp = bip->bli_buf;
        if (XFS_BUF_ISPINNED(bp))
                return XFS_ITEM_PINNED;
        if (!XFS_BUF_CPSEMA(bp))
@@ -529,13 +514,12 @@ xfs_buf_item_trylock(
 */
 STATIC void
 xfs_buf_item_unlock(
-        xfs_buf_log_item_t      *bip)
+        struct xfs_log_item     *lip)
 {
-        int             aborted;
+        struct xfs_buf_log_item *bip = BUF_ITEM(lip);
-        xfs_buf_t       *bp;
+        struct xfs_buf          *bp = bip->bli_buf;
-        uint            hold;
+        int                     aborted;
+        uint                    hold;
-        bp = bip->bli_buf;
        /* Clear the buffer's association with this transaction. */
        XFS_BUF_SET_FSPRIVATE2(bp, NULL);
@@ -546,7 +530,7 @@ xfs_buf_item_unlock(
         * (cancelled) buffers at unpin time, but we'll never go through the
         * pin/unpin cycle if we abort inside commit.
         */
-        aborted = (bip->bli_item.li_flags & XFS_LI_ABORTED) != 0;
+        aborted = (lip->li_flags & XFS_LI_ABORTED) != 0;
        /*
         * Before possibly freeing the buf item, determine if we should
@@ -607,16 +591,16 @@ xfs_buf_item_unlock(
 */
 STATIC xfs_lsn_t
 xfs_buf_item_committed(
-        xfs_buf_log_item_t      *bip,
+        struct xfs_log_item     *lip,
        xfs_lsn_t               lsn)
 {
+        struct xfs_buf_log_item *bip = BUF_ITEM(lip);
        trace_xfs_buf_item_committed(bip);
-        if ((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) &&
+        if ((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) && lip->li_lsn != 0)
-            (bip->bli_item.li_lsn != 0)) {
+                return lip->li_lsn;
-                return bip->bli_item.li_lsn;
+        return lsn;
-        }
-        return (lsn);
 }
 /*
@@ -626,15 +610,16 @@ xfs_buf_item_committed(
 */
 STATIC void
 xfs_buf_item_push(
-        xfs_buf_log_item_t      *bip)
+        struct xfs_log_item     *lip)
 {
-        xfs_buf_t       *bp;
+        struct xfs_buf_log_item *bip = BUF_ITEM(lip);
+        struct xfs_buf          *bp = bip->bli_buf;
        ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
+        ASSERT(!XFS_BUF_ISDELAYWRITE(bp));
        trace_xfs_buf_item_push(bip);
-        bp = bip->bli_buf;
-        ASSERT(!XFS_BUF_ISDELAYWRITE(bp));
        xfs_buf_relse(bp);
 }
@@ -646,22 +631,24 @@ xfs_buf_item_push(
 */
 STATIC void
 xfs_buf_item_pushbuf(
-        xfs_buf_log_item_t      *bip)
+        struct xfs_log_item     *lip)
 {
-        xfs_buf_t       *bp;
+        struct xfs_buf_log_item *bip = BUF_ITEM(lip);
+        struct xfs_buf          *bp = bip->bli_buf;
        ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
+        ASSERT(XFS_BUF_ISDELAYWRITE(bp));
        trace_xfs_buf_item_pushbuf(bip);
-        bp = bip->bli_buf;
-        ASSERT(XFS_BUF_ISDELAYWRITE(bp));
        xfs_buf_delwri_promote(bp);
        xfs_buf_relse(bp);
 }
-/* ARGSUSED */
 STATIC void
-xfs_buf_item_committing(xfs_buf_log_item_t *bip, xfs_lsn_t commit_lsn)
+xfs_buf_item_committing(
+        struct xfs_log_item     *lip,
+        xfs_lsn_t               commit_lsn)
 {
 }
@@ -669,21 +656,16 @@ xfs_buf_item_committing(xfs_buf_log_item_t *bip, xfs_lsn_t commit_lsn)
 * This is the ops vector shared by all buf log items.
 */
 static struct xfs_item_ops xfs_buf_item_ops = {
-        .iop_size       = (uint(*)(xfs_log_item_t*))xfs_buf_item_size,
+        .iop_size       = xfs_buf_item_size,
-        .iop_format     = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
+        .iop_format     = xfs_buf_item_format,
-                                        xfs_buf_item_format,
+        .iop_pin        = xfs_buf_item_pin,
-        .iop_pin        = (void(*)(xfs_log_item_t*))xfs_buf_item_pin,
+        .iop_unpin      = xfs_buf_item_unpin,
-        .iop_unpin      = (void(*)(xfs_log_item_t*))xfs_buf_item_unpin,
+        .iop_trylock    = xfs_buf_item_trylock,
-        .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t *))
+        .iop_unlock     = xfs_buf_item_unlock,
-                                        xfs_buf_item_unpin_remove,
+        .iop_committed  = xfs_buf_item_committed,
-        .iop_trylock    = (uint(*)(xfs_log_item_t*))xfs_buf_item_trylock,
+        .iop_push       = xfs_buf_item_push,
-        .iop_unlock     = (void(*)(xfs_log_item_t*))xfs_buf_item_unlock,
+        .iop_pushbuf    = xfs_buf_item_pushbuf,
-        .iop_committed  = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
+        .iop_committing = xfs_buf_item_committing
-                                        xfs_buf_item_committed,
-        .iop_push       = (void(*)(xfs_log_item_t*))xfs_buf_item_push,
-        .iop_pushbuf    = (void(*)(xfs_log_item_t*))xfs_buf_item_pushbuf,
-        .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
-                                        xfs_buf_item_committing
 };
@@ -710,9 +692,7 @@ xfs_buf_item_init(
         * the first.  If we do already have one, there is
         * nothing to do here so return.
         */
-        if (bp->b_mount != mp)
+        ASSERT(bp->b_target->bt_mount == mp);
-                bp->b_mount = mp;
-        XFS_BUF_SET_BDSTRAT_FUNC(bp, xfs_bdstrat_cb);
        if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) {
                lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
                if (lip->li_type == XFS_LI_BUF) {
@@ -993,7 +973,7 @@ xfs_buf_iodone_callbacks(
                        xfs_buf_do_callbacks(bp, lip);
                        XFS_BUF_SET_FSPRIVATE(bp, NULL);
                        XFS_BUF_CLR_IODONE_FUNC(bp);
-                        xfs_biodone(bp);
+                        xfs_buf_ioend(bp, 0);
                        return;
                }
@@ -1052,7 +1032,7 @@ xfs_buf_iodone_callbacks(
        xfs_buf_do_callbacks(bp, lip);
        XFS_BUF_SET_FSPRIVATE(bp, NULL);
        XFS_BUF_CLR_IODONE_FUNC(bp);
-        xfs_biodone(bp);
+        xfs_buf_ioend(bp, 0);
 }
 /*
@@ -1098,15 +1078,14 @@ xfs_buf_error_relse(
 * It is called by xfs_buf_iodone_callbacks() above which will take
 * care of cleaning up the buffer itself.
 */
-/* ARGSUSED */
 void
 xfs_buf_iodone(
-        xfs_buf_t               *bp,
+        struct xfs_buf          *bp,
-        xfs_buf_log_item_t      *bip)
+        struct xfs_log_item     *lip)
 {
-        struct xfs_ail          *ailp = bip->bli_item.li_ailp;
+        struct xfs_ail          *ailp = lip->li_ailp;
-        ASSERT(bip->bli_buf == bp);
+        ASSERT(BUF_ITEM(lip)->bli_buf == bp);
        xfs_buf_rele(bp);
@@ -1120,6 +1099,6 @@ xfs_buf_iodone(
         * Either way, AIL is useless if we're forcing a shutdown.
         */
        spin_lock(&ailp->xa_lock);
-        xfs_trans_ail_delete(ailp, (xfs_log_item_t *)bip);
+        xfs_trans_ail_delete(ailp, lip);
-        xfs_buf_item_free(bip);
+        xfs_buf_item_free(BUF_ITEM(lip));
 }
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index f20bb472d582..0e2ed43f16c7 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -124,7 +124,7 @@ void	xfs_buf_attach_iodone(struct xfs_buf *,
                              void(*)(struct xfs_buf *, xfs_log_item_t *),
                              xfs_log_item_t *);
 void    xfs_buf_iodone_callbacks(struct xfs_buf *);
-void    xfs_buf_iodone(struct xfs_buf *, xfs_buf_log_item_t *);
+void    xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *);
 #ifdef XFS_TRANS_DEBUG
 void
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 0ca556b4bf31..1c00bedb3175 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -25,19 +25,14 @@
 #include "xfs_sb.h"
 #include "xfs_ag.h"
 #include "xfs_dir2.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
 #include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_inode_item.h"
 #include "xfs_alloc.h"
-#include "xfs_btree.h"
 #include "xfs_bmap.h"
 #include "xfs_attr.h"
 #include "xfs_attr_leaf.h"
@@ -581,16 +576,14 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
        xfs_da_intnode_t *node;
        xfs_da_node_entry_t *btree;
        int tmp;
-        xfs_mount_t *mp;
        node = oldblk->bp->data;
-        mp = state->mp;
        ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
        ASSERT((oldblk->index >= 0) && (oldblk->index <= be16_to_cpu(node->hdr.count)));
        ASSERT(newblk->blkno != 0);
        if (state->args->whichfork == XFS_DATA_FORK)
-                ASSERT(newblk->blkno >= mp->m_dirleafblk &&
+                ASSERT(newblk->blkno >= state->mp->m_dirleafblk &&
-                       newblk->blkno < mp->m_dirfreeblk);
+                       newblk->blkno < state->mp->m_dirfreeblk);
        /*
         * We may need to make some room before we insert the new node.
@@ -1601,7 +1594,7 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
                        xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|
                        XFS_BMAPI_CONTIG,
                        args->firstblock, args->total, &map, &nmap,
-                        args->flist, NULL))) {
+                        args->flist))) {
                return error;
        }
        ASSERT(nmap <= 1);
@@ -1622,8 +1615,7 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
                                        xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE|
                                        XFS_BMAPI_METADATA,
                                        args->firstblock, args->total,
-                                        &mapp[mapi], &nmap, args->flist,
+                                        &mapp[mapi], &nmap, args->flist))) {
-                                        NULL))) {
                                kmem_free(mapp);
                                return error;
                        }
@@ -1884,7 +1876,7 @@ xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
                 */
                if ((error = xfs_bunmapi(tp, dp, dead_blkno, count,
                                xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA,
-                                0, args->firstblock, args->flist, NULL,
+                                0, args->firstblock, args->flist,
                                &done)) == ENOSPC) {
                        if (w != XFS_DATA_FORK)
                                break;
@@ -1989,7 +1981,7 @@ xfs_da_do_buf(
                                        nfsb,
                                        XFS_BMAPI_METADATA |
                                                xfs_bmapi_aflag(whichfork),
-                                        NULL, 0, mapp, &nmap, NULL, NULL)))
+                                        NULL, 0, mapp, &nmap, NULL)))
                                goto exit0;
                }
        } else {
@@ -2050,7 +2042,7 @@ xfs_da_do_buf(
                                mappedbno, nmapped, 0, &bp);
                        break;
                case 3:
-                        xfs_baread(mp->m_ddev_targp, mappedbno, nmapped);
+                        xfs_buf_readahead(mp->m_ddev_targp, mappedbno, nmapped);
                        error = 0;
                        bp = NULL;
                        break;
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 7f159d2a429a..3b9582c60a22 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -24,24 +24,15 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_inode_item.h"
 #include "xfs_bmap.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
 #include "xfs_itable.h"
 #include "xfs_dfrag.h"
 #include "xfs_error.h"
-#include "xfs_rw.h"
 #include "xfs_vnodeops.h"
 #include "xfs_trace.h"
@@ -425,11 +416,8 @@ xfs_swap_extents(
        }
-        IHOLD(ip);
+        xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+        xfs_trans_ijoin_ref(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-        IHOLD(tip);
-        xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
        xfs_trans_log_inode(tp, ip,  ilf_fields);
        xfs_trans_log_inode(tp, tip, tilf_fields);
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h
index e5b153b2e6a3..dffba9ba0db6 100644
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/xfs_dinode.h
@@ -49,8 +49,9 @@ typedef struct xfs_dinode {
        __be32          di_uid;         /* owner's user id */
        __be32          di_gid;         /* owner's group id */
        __be32          di_nlink;       /* number of links to file */
-        __be16          di_projid;      /* owner's project id */
+        __be16          di_projid_lo;   /* lower part of owner's project id */
-        __u8            di_pad[8];      /* unused, zeroed space */
+        __be16          di_projid_hi;   /* higher part owner's project id */
+        __u8            di_pad[6];      /* unused, zeroed space */
        __be16          di_flushiter;   /* incremented on flush */
        xfs_timestamp_t di_atime;       /* time last accessed */
        xfs_timestamp_t di_mtime;       /* time last modified */
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index 42520f041265..a1321bc7f192 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -25,13 +25,11 @@
 #include "xfs_sb.h"
 #include "xfs_ag.h"
 #include "xfs_dir2.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_inode_item.h"
@@ -382,7 +380,7 @@ xfs_readdir(
        int             rval;           /* return value */
        int             v;              /* type-checking value */
-        xfs_itrace_entry(dp);
+        trace_xfs_readdir(dp);
        if (XFS_FORCED_SHUTDOWN(dp->i_mount))
                return XFS_ERROR(EIO);
@@ -549,7 +547,7 @@ xfs_dir2_grow_inode(
        if ((error = xfs_bmapi(tp, dp, bno, count,
                        XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG,
                        args->firstblock, args->total, &map, &nmap,
-                        args->flist, NULL)))
+                        args->flist)))
                return error;
        ASSERT(nmap <= 1);
        if (nmap == 1) {
@@ -581,8 +579,7 @@ xfs_dir2_grow_inode(
                        if ((error = xfs_bmapi(tp, dp, b, c,
                                        XFS_BMAPI_WRITE|XFS_BMAPI_METADATA,
                                        args->firstblock, args->total,
-                                        &mapp[mapi], &nmap, args->flist,
+                                        &mapp[mapi], &nmap, args->flist))) {
-                                        NULL))) {
                                kmem_free(mapp);
                                return error;
                        }
@@ -715,7 +712,7 @@ xfs_dir2_shrink_inode(
         */
        if ((error = xfs_bunmapi(tp, dp, da, mp->m_dirblkfsbs,
                        XFS_BMAPI_METADATA, 0, args->firstblock, args->flist,
-                        NULL, &done))) {
+                        &done))) {
                /*
                 * ENOSPC actually can happen if we're in a removename with
                 * no space reservation, and the resulting block removal
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 779a267b0a84..580d99cef9e7 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -24,12 +24,10 @@
 #include "xfs_sb.h"
 #include "xfs_ag.h"
 #include "xfs_dir2.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_inode_item.h"
@@ -1073,10 +1071,10 @@ xfs_dir2_sf_to_block(
         */
        buf_len = dp->i_df.if_bytes;
-        buf = kmem_alloc(dp->i_df.if_bytes, KM_SLEEP);
+        buf = kmem_alloc(buf_len, KM_SLEEP);
-        memcpy(buf, sfp, dp->i_df.if_bytes);
+        memcpy(buf, sfp, buf_len);
-        xfs_idata_realloc(dp, -dp->i_df.if_bytes, XFS_DATA_FORK);
+        xfs_idata_realloc(dp, -buf_len, XFS_DATA_FORK);
        dp->i_d.di_size = 0;
        xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
        /*
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index 498f8d694330..921595b84f5b 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -24,12 +24,10 @@
 #include "xfs_sb.h"
 #include "xfs_ag.h"
 #include "xfs_dir2.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_dir2_data.h"
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index e2d89854ec9e..ae891223be90 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -25,11 +25,9 @@
 #include "xfs_sb.h"
 #include "xfs_ag.h"
 #include "xfs_dir2.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
@@ -875,7 +873,7 @@ xfs_dir2_leaf_getdents(
                                        xfs_dir2_byte_to_da(mp,
                                                XFS_DIR2_LEAF_OFFSET) - map_off,
                                        XFS_BMAPI_METADATA, NULL, 0,
-                                        &map[map_valid], &nmap, NULL, NULL);
+                                        &map[map_valid], &nmap, NULL);
                                /*
                                 * Don't know if we should ignore this or
                                 * try to return an error.
@@ -963,7 +961,7 @@ xfs_dir2_leaf_getdents(
                                if (i > ra_current &&
                                    map[ra_index].br_blockcount >=
                                    mp->m_dirblkfsbs) {
-                                        xfs_baread(mp->m_ddev_targp,
+                                        xfs_buf_readahead(mp->m_ddev_targp,
                                                XFS_FSB_TO_DADDR(mp,
                                                   map[ra_index].br_startblock +
                                                   ra_offset),
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 78fc4d9ae756..f9a0864b696a 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -24,12 +24,10 @@
 #include "xfs_sb.h"
 #include "xfs_ag.h"
 #include "xfs_dir2.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_bmap.h"
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index c1a5945d463a..b1bae6b1eed9 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -24,12 +24,10 @@
 #include "xfs_sb.h"
 #include "xfs_ag.h"
 #include "xfs_dir2.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_inode_item.h"
diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h
deleted file mode 100644
index 2813cdd72375..000000000000
--- a/fs/xfs/xfs_dmapi.h
+++ /dev/null
@@ -1,170 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_DMAPI_H__
-#define __XFS_DMAPI_H__
-/*      Values used to define the on-disk version of dm_attrname_t. All
- *      on-disk attribute names start with the 8-byte string "SGI_DMI_".
- *
- *      In the on-disk inode, DMAPI attribute names consist of the user-provided
- *      name with the DMATTR_PREFIXSTRING pre-pended.  This string must NEVER be
- *      changed.
- */
-#define DMATTR_PREFIXLEN        8
-#define DMATTR_PREFIXSTRING     "SGI_DMI_"
-typedef enum {
-        DM_EVENT_INVALID        = -1,
-        DM_EVENT_CANCEL         = 0,            /* not supported */
-        DM_EVENT_MOUNT          = 1,
-        DM_EVENT_PREUNMOUNT     = 2,
-        DM_EVENT_UNMOUNT        = 3,
-        DM_EVENT_DEBUT          = 4,            /* not supported */
-        DM_EVENT_CREATE         = 5,
-        DM_EVENT_CLOSE          = 6,            /* not supported */
-        DM_EVENT_POSTCREATE     = 7,
-        DM_EVENT_REMOVE         = 8,
-        DM_EVENT_POSTREMOVE     = 9,
-        DM_EVENT_RENAME         = 10,
-        DM_EVENT_POSTRENAME     = 11,
-        DM_EVENT_LINK           = 12,
-        DM_EVENT_POSTLINK       = 13,
-        DM_EVENT_SYMLINK        = 14,
-        DM_EVENT_POSTSYMLINK    = 15,
-        DM_EVENT_READ           = 16,
-        DM_EVENT_WRITE          = 17,
-        DM_EVENT_TRUNCATE       = 18,
-        DM_EVENT_ATTRIBUTE      = 19,
-        DM_EVENT_DESTROY        = 20,
-        DM_EVENT_NOSPACE        = 21,
-        DM_EVENT_USER           = 22,
-        DM_EVENT_MAX            = 23
-} dm_eventtype_t;
-#define HAVE_DM_EVENTTYPE_T
-typedef enum {
-        DM_RIGHT_NULL,
-        DM_RIGHT_SHARED,
-        DM_RIGHT_EXCL
-} dm_right_t;
-#define HAVE_DM_RIGHT_T
-/* Defines for determining if an event message should be sent. */
-#ifdef HAVE_DMAPI
-#define DM_EVENT_ENABLED(ip, event) ( \
-        unlikely ((ip)->i_mount->m_flags & XFS_MOUNT_DMAPI) && \
-                ( ((ip)->i_d.di_dmevmask & (1 << event)) || \
-                  ((ip)->i_mount->m_dmevmask & (1 << event)) ) \
-        )
-#else
-#define DM_EVENT_ENABLED(ip, event)     (0)
-#endif
-#define DM_XFS_VALID_FS_EVENTS          ( \
-        (1 << DM_EVENT_PREUNMOUNT)      | \
-        (1 << DM_EVENT_UNMOUNT)         | \
-        (1 << DM_EVENT_NOSPACE)         | \
-        (1 << DM_EVENT_DEBUT)           | \
-        (1 << DM_EVENT_CREATE)          | \
-        (1 << DM_EVENT_POSTCREATE)      | \
-        (1 << DM_EVENT_REMOVE)          | \
-        (1 << DM_EVENT_POSTREMOVE)      | \
-        (1 << DM_EVENT_RENAME)          | \
-        (1 << DM_EVENT_POSTRENAME)      | \
-        (1 << DM_EVENT_LINK)            | \
-        (1 << DM_EVENT_POSTLINK)        | \
-        (1 << DM_EVENT_SYMLINK)         | \
-        (1 << DM_EVENT_POSTSYMLINK)     | \
-        (1 << DM_EVENT_ATTRIBUTE)       | \
-        (1 << DM_EVENT_DESTROY)         )
-/* Events valid in dm_set_eventlist() when called with a file handle for
-   a regular file or a symlink.  These events are persistent.
-*/
-#define DM_XFS_VALID_FILE_EVENTS        ( \
-        (1 << DM_EVENT_ATTRIBUTE)       | \
-        (1 << DM_EVENT_DESTROY)         )
-/* Events valid in dm_set_eventlist() when called with a file handle for
-   a directory.  These events are persistent.
-*/
-#define DM_XFS_VALID_DIRECTORY_EVENTS   ( \
-        (1 << DM_EVENT_CREATE)          | \
-        (1 << DM_EVENT_POSTCREATE)      | \
-        (1 << DM_EVENT_REMOVE)          | \
-        (1 << DM_EVENT_POSTREMOVE)      | \
-        (1 << DM_EVENT_RENAME)          | \
-        (1 << DM_EVENT_POSTRENAME)      | \
-        (1 << DM_EVENT_LINK)            | \
-        (1 << DM_EVENT_POSTLINK)        | \
-        (1 << DM_EVENT_SYMLINK)         | \
-        (1 << DM_EVENT_POSTSYMLINK)     | \
-        (1 << DM_EVENT_ATTRIBUTE)       | \
-        (1 << DM_EVENT_DESTROY)         )
-/* Events supported by the XFS filesystem. */
-#define DM_XFS_SUPPORTED_EVENTS         ( \
-        (1 << DM_EVENT_MOUNT)           | \
-        (1 << DM_EVENT_PREUNMOUNT)      | \
-        (1 << DM_EVENT_UNMOUNT)         | \
-        (1 << DM_EVENT_NOSPACE)         | \
-        (1 << DM_EVENT_CREATE)          | \
-        (1 << DM_EVENT_POSTCREATE)      | \
-        (1 << DM_EVENT_REMOVE)          | \
-        (1 << DM_EVENT_POSTREMOVE)      | \
-        (1 << DM_EVENT_RENAME)          | \
-        (1 << DM_EVENT_POSTRENAME)      | \
-        (1 << DM_EVENT_LINK)            | \
-        (1 << DM_EVENT_POSTLINK)        | \
-        (1 << DM_EVENT_SYMLINK)         | \
-        (1 << DM_EVENT_POSTSYMLINK)     | \
-        (1 << DM_EVENT_READ)            | \
-        (1 << DM_EVENT_WRITE)           | \
-        (1 << DM_EVENT_TRUNCATE)        | \
-        (1 << DM_EVENT_ATTRIBUTE)       | \
-        (1 << DM_EVENT_DESTROY)         )
-/*
- *      Definitions used for the flags field on dm_send_*_event().
- */
-#define DM_FLAGS_NDELAY         0x001   /* return EAGAIN after dm_pending() */
-#define DM_FLAGS_UNWANTED       0x002   /* event not in fsys dm_eventset_t */
-#define DM_FLAGS_IMUX           0x004   /* thread holds i_mutex */
-#define DM_FLAGS_IALLOCSEM_RD   0x010   /* thread holds i_alloc_sem rd */
-#define DM_FLAGS_IALLOCSEM_WR   0x020   /* thread holds i_alloc_sem wr */
-/*
- *      Pull in platform specific event flags defines
- */
-#include "xfs_dmapi_priv.h"
-/*
- *      Macros to turn caller specified delay/block flags into
- *      dm_send_xxxx_event flag DM_FLAGS_NDELAY.
- */
-#define FILP_DELAY_FLAG(filp) ((filp->f_flags&(O_NDELAY|O_NONBLOCK)) ? \
-                        DM_FLAGS_NDELAY : 0)
-#define AT_DELAY_FLAG(f) ((f & XFS_ATTR_NONBLOCK) ? DM_FLAGS_NDELAY : 0)
-#endif  /* __XFS_DMAPI_H__ */
diff --git a/fs/xfs/xfs_dmops.c b/fs/xfs/xfs_dmops.c
deleted file mode 100644
index e71e2581c0c3..000000000000
--- a/fs/xfs/xfs_dmops.c
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_types.h"
-#include "xfs_log.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_dmapi.h"
-#include "xfs_inum.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-static struct xfs_dmops xfs_dmcore_stub = {
-        .xfs_send_data          = (xfs_send_data_t)fs_nosys,
-        .xfs_send_mmap          = (xfs_send_mmap_t)fs_noerr,
-        .xfs_send_destroy       = (xfs_send_destroy_t)fs_nosys,
-        .xfs_send_namesp        = (xfs_send_namesp_t)fs_nosys,
-        .xfs_send_mount         = (xfs_send_mount_t)fs_nosys,
-        .xfs_send_unmount       = (xfs_send_unmount_t)fs_noerr,
-};
-int
-xfs_dmops_get(struct xfs_mount *mp)
-{
-        if (mp->m_flags & XFS_MOUNT_DMAPI) {
-                cmn_err(CE_WARN,
-                        "XFS: dmapi support not available in this kernel.");
-                return EINVAL;
-        }
-        mp->m_dm_ops = &xfs_dmcore_stub;
-        return 0;
-}
-void
-xfs_dmops_put(struct xfs_mount *mp)
-{
-}
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 047b8a8e5c29..ed9990267661 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -23,12 +23,8 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_utils.h"
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 409fe81585fd..a55e687bf562 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -24,7 +24,6 @@
 #include "xfs_buf_item.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_trans_priv.h"
 #include "xfs_extfree_item.h"
@@ -33,18 +32,19 @@
 kmem_zone_t     *xfs_efi_zone;
 kmem_zone_t     *xfs_efd_zone;
-STATIC void     xfs_efi_item_unlock(xfs_efi_log_item_t *);
+static inline struct xfs_efi_log_item *EFI_ITEM(struct xfs_log_item *lip)
+{
+        return container_of(lip, struct xfs_efi_log_item, efi_item);
+}
 void
-xfs_efi_item_free(xfs_efi_log_item_t *efip)
+xfs_efi_item_free(
+        struct xfs_efi_log_item *efip)
 {
-        int nexts = efip->efi_format.efi_nextents;
+        if (efip->efi_format.efi_nextents > XFS_EFI_MAX_FAST_EXTENTS)
-        if (nexts > XFS_EFI_MAX_FAST_EXTENTS) {
                kmem_free(efip);
-        } else {
+        else
                kmem_zone_free(xfs_efi_zone, efip);
-        }
 }
 /*
@@ -52,9 +52,9 @@ xfs_efi_item_free(xfs_efi_log_item_t *efip)
 * We only need 1 iovec for an efi item.  It just logs the efi_log_format
 * structure.
 */
-/*ARGSUSED*/
 STATIC uint
-xfs_efi_item_size(xfs_efi_log_item_t *efip)
+xfs_efi_item_size(
+        struct xfs_log_item     *lip)
 {
        return 1;
 }
@@ -67,10 +67,12 @@ xfs_efi_item_size(xfs_efi_log_item_t *efip)
 * slots in the efi item have been filled.
 */
 STATIC void
-xfs_efi_item_format(xfs_efi_log_item_t  *efip,
+xfs_efi_item_format(
-                    xfs_log_iovec_t     *log_vector)
+        struct xfs_log_item     *lip,
+        struct xfs_log_iovec    *log_vector)
 {
-        uint    size;
+        struct xfs_efi_log_item *efip = EFI_ITEM(lip);
+        uint                    size;
        ASSERT(efip->efi_next_extent == efip->efi_format.efi_nextents);
@@ -80,7 +82,7 @@ xfs_efi_item_format(xfs_efi_log_item_t	*efip,
        size += (efip->efi_format.efi_nextents - 1) * sizeof(xfs_extent_t);
        efip->efi_format.efi_size = 1;
-        log_vector->i_addr = (xfs_caddr_t)&(efip->efi_format);
+        log_vector->i_addr = &efip->efi_format;
        log_vector->i_len = size;
        log_vector->i_type = XLOG_REG_TYPE_EFI_FORMAT;
        ASSERT(size >= sizeof(xfs_efi_log_format_t));
@@ -90,60 +92,33 @@ xfs_efi_item_format(xfs_efi_log_item_t	*efip,
 /*
 * Pinning has no meaning for an efi item, so just return.
 */
-/*ARGSUSED*/
 STATIC void
-xfs_efi_item_pin(xfs_efi_log_item_t *efip)
+xfs_efi_item_pin(
+        struct xfs_log_item     *lip)
 {
-        return;
 }
 /*
 * While EFIs cannot really be pinned, the unpin operation is the
 * last place at which the EFI is manipulated during a transaction.
 * Here we coordinate with xfs_efi_cancel() to determine who gets to
 * free the EFI.
 */
-/*ARGSUSED*/
-STATIC void
-xfs_efi_item_unpin(xfs_efi_log_item_t *efip)
-{
-        struct xfs_ail          *ailp = efip->efi_item.li_ailp;
-        spin_lock(&ailp->xa_lock);
-        if (efip->efi_flags & XFS_EFI_CANCELED) {
-                /* xfs_trans_ail_delete() drops the AIL lock. */
-                xfs_trans_ail_delete(ailp, (xfs_log_item_t *)efip);
-                xfs_efi_item_free(efip);
-        } else {
-                efip->efi_flags |= XFS_EFI_COMMITTED;
-                spin_unlock(&ailp->xa_lock);
-        }
-}
-/*
- * like unpin only we have to also clear the xaction descriptor
- * pointing the log item if we free the item.  This routine duplicates
- * unpin because efi_flags is protected by the AIL lock.  Freeing
- * the descriptor and then calling unpin would force us to drop the AIL
- * lock which would open up a race condition.
- */
 STATIC void
-xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp)
+xfs_efi_item_unpin(
+        struct xfs_log_item     *lip,
+        int                     remove)
 {
-        struct xfs_ail          *ailp = efip->efi_item.li_ailp;
+        struct xfs_efi_log_item *efip = EFI_ITEM(lip);
-        xfs_log_item_desc_t     *lidp;
+        struct xfs_ail          *ailp = lip->li_ailp;
        spin_lock(&ailp->xa_lock);
        if (efip->efi_flags & XFS_EFI_CANCELED) {
-                /*
+                if (remove)
-                 * free the xaction descriptor pointing to this item
+                        xfs_trans_del_item(lip);
-                 */
-                lidp = xfs_trans_find_item(tp, (xfs_log_item_t *) efip);
-                xfs_trans_free_item(tp, lidp);
                /* xfs_trans_ail_delete() drops the AIL lock. */
-                xfs_trans_ail_delete(ailp, (xfs_log_item_t *)efip);
+                xfs_trans_ail_delete(ailp, lip);
                xfs_efi_item_free(efip);
        } else {
                efip->efi_flags |= XFS_EFI_COMMITTED;
@@ -158,9 +133,9 @@ xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp)
 * XFS_ITEM_PINNED so that the caller will eventually flush the log.
 * This should help in getting the EFI out of the AIL.
 */
-/*ARGSUSED*/
 STATIC uint
-xfs_efi_item_trylock(xfs_efi_log_item_t *efip)
+xfs_efi_item_trylock(
+        struct xfs_log_item     *lip)
 {
        return XFS_ITEM_PINNED;
 }
@@ -168,13 +143,12 @@ xfs_efi_item_trylock(xfs_efi_log_item_t *efip)
 /*
 * Efi items have no locking, so just return.
 */
-/*ARGSUSED*/
 STATIC void
-xfs_efi_item_unlock(xfs_efi_log_item_t *efip)
+xfs_efi_item_unlock(
+        struct xfs_log_item     *lip)
 {
-        if (efip->efi_item.li_flags & XFS_LI_ABORTED)
+        if (lip->li_flags & XFS_LI_ABORTED)
-                xfs_efi_item_free(efip);
+                xfs_efi_item_free(EFI_ITEM(lip));
-        return;
 }
 /*
@@ -183,9 +157,10 @@ xfs_efi_item_unlock(xfs_efi_log_item_t *efip)
 * flag is not paid any attention here.  Checking for that is delayed
 * until the EFI is unpinned.
 */
-/*ARGSUSED*/
 STATIC xfs_lsn_t
-xfs_efi_item_committed(xfs_efi_log_item_t *efip, xfs_lsn_t lsn)
+xfs_efi_item_committed(
+        struct xfs_log_item     *lip,
+        xfs_lsn_t               lsn)
 {
        return lsn;
 }
@@ -195,11 +170,10 @@ xfs_efi_item_committed(xfs_efi_log_item_t *efip, xfs_lsn_t lsn)
 * stuck waiting for all of its corresponding efd items to be
 * committed to disk.
 */
-/*ARGSUSED*/
 STATIC void
-xfs_efi_item_push(xfs_efi_log_item_t *efip)
+xfs_efi_item_push(
+        struct xfs_log_item     *lip)
 {
-        return;
 }
 /*
@@ -209,61 +183,55 @@ xfs_efi_item_push(xfs_efi_log_item_t *efip)
 * example, for inodes, the inode is locked throughout the extent freeing
 * so the dependency should be recorded there.
 */
-/*ARGSUSED*/
 STATIC void
-xfs_efi_item_committing(xfs_efi_log_item_t *efip, xfs_lsn_t lsn)
+xfs_efi_item_committing(
+        struct xfs_log_item     *lip,
+        xfs_lsn_t               lsn)
 {
-        return;
 }
 /*
 * This is the ops vector shared by all efi log items.
 */
 static struct xfs_item_ops xfs_efi_item_ops = {
-        .iop_size       = (uint(*)(xfs_log_item_t*))xfs_efi_item_size,
+        .iop_size       = xfs_efi_item_size,
-        .iop_format     = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
+        .iop_format     = xfs_efi_item_format,
-                                        xfs_efi_item_format,
+        .iop_pin        = xfs_efi_item_pin,
-        .iop_pin        = (void(*)(xfs_log_item_t*))xfs_efi_item_pin,
+        .iop_unpin      = xfs_efi_item_unpin,
-        .iop_unpin      = (void(*)(xfs_log_item_t*))xfs_efi_item_unpin,
+        .iop_trylock    = xfs_efi_item_trylock,
-        .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t *))
+        .iop_unlock     = xfs_efi_item_unlock,
-                                        xfs_efi_item_unpin_remove,
+        .iop_committed  = xfs_efi_item_committed,
-        .iop_trylock    = (uint(*)(xfs_log_item_t*))xfs_efi_item_trylock,
+        .iop_push       = xfs_efi_item_push,
-        .iop_unlock     = (void(*)(xfs_log_item_t*))xfs_efi_item_unlock,
+        .iop_committing = xfs_efi_item_committing
-        .iop_committed  = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
-                                        xfs_efi_item_committed,
-        .iop_push       = (void(*)(xfs_log_item_t*))xfs_efi_item_push,
-        .iop_pushbuf    = NULL,
-        .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
-                                        xfs_efi_item_committing
 };
 /*
 * Allocate and initialize an efi item with the given number of extents.
 */
-xfs_efi_log_item_t *
+struct xfs_efi_log_item *
-xfs_efi_init(xfs_mount_t        *mp,
+xfs_efi_init(
-             uint               nextents)
+        struct xfs_mount        *mp,
+        uint                    nextents)
 {
-        xfs_efi_log_item_t      *efip;
+        struct xfs_efi_log_item *efip;
        uint                    size;
        ASSERT(nextents > 0);
        if (nextents > XFS_EFI_MAX_FAST_EXTENTS) {
                size = (uint)(sizeof(xfs_efi_log_item_t) +
                        ((nextents - 1) * sizeof(xfs_extent_t)));
-                efip = (xfs_efi_log_item_t*)kmem_zalloc(size, KM_SLEEP);
+                efip = kmem_zalloc(size, KM_SLEEP);
        } else {
-                efip = (xfs_efi_log_item_t*)kmem_zone_zalloc(xfs_efi_zone,
+                efip = kmem_zone_zalloc(xfs_efi_zone, KM_SLEEP);
-                                                             KM_SLEEP);
        }
        xfs_log_item_init(mp, &efip->efi_item, XFS_LI_EFI, &xfs_efi_item_ops);
        efip->efi_format.efi_nextents = nextents;
        efip->efi_format.efi_id = (__psint_t)(void*)efip;
-        return (efip);
+        return efip;
 }
 /*
@@ -276,7 +244,7 @@ xfs_efi_init(xfs_mount_t	*mp,
 int
 xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt)
 {
-        xfs_efi_log_format_t *src_efi_fmt = (xfs_efi_log_format_t *)buf->i_addr;
+        xfs_efi_log_format_t *src_efi_fmt = buf->i_addr;
        uint i;
        uint len = sizeof(xfs_efi_log_format_t) + 
                (src_efi_fmt->efi_nextents - 1) * sizeof(xfs_extent_t);  
@@ -289,8 +257,7 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt)
                memcpy((char *)dst_efi_fmt, (char*)src_efi_fmt, len);
                return 0;
        } else if (buf->i_len == len32) {
-                xfs_efi_log_format_32_t *src_efi_fmt_32 =
+                xfs_efi_log_format_32_t *src_efi_fmt_32 = buf->i_addr;
-                        (xfs_efi_log_format_32_t *)buf->i_addr;
                dst_efi_fmt->efi_type     = src_efi_fmt_32->efi_type;
                dst_efi_fmt->efi_size     = src_efi_fmt_32->efi_size;
@@ -304,8 +271,7 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt)
                }
                return 0;
        } else if (buf->i_len == len64) {
-                xfs_efi_log_format_64_t *src_efi_fmt_64 =
+                xfs_efi_log_format_64_t *src_efi_fmt_64 = buf->i_addr;
-                        (xfs_efi_log_format_64_t *)buf->i_addr;
                dst_efi_fmt->efi_type     = src_efi_fmt_64->efi_type;
                dst_efi_fmt->efi_size     = src_efi_fmt_64->efi_size;
@@ -356,16 +322,18 @@ xfs_efi_release(xfs_efi_log_item_t	*efip,
        }
 }
-STATIC void
+static inline struct xfs_efd_log_item *EFD_ITEM(struct xfs_log_item *lip)
-xfs_efd_item_free(xfs_efd_log_item_t *efdp)
 {
-        int nexts = efdp->efd_format.efd_nextents;
+        return container_of(lip, struct xfs_efd_log_item, efd_item);
+}
-        if (nexts > XFS_EFD_MAX_FAST_EXTENTS) {
+STATIC void
+xfs_efd_item_free(struct xfs_efd_log_item *efdp)
+{
+        if (efdp->efd_format.efd_nextents > XFS_EFD_MAX_FAST_EXTENTS)
                kmem_free(efdp);
-        } else {
+        else
                kmem_zone_free(xfs_efd_zone, efdp);
-        }
 }
 /*
@@ -373,9 +341,9 @@ xfs_efd_item_free(xfs_efd_log_item_t *efdp)
 * We only need 1 iovec for an efd item.  It just logs the efd_log_format
 * structure.
 */
-/*ARGSUSED*/
 STATIC uint
-xfs_efd_item_size(xfs_efd_log_item_t *efdp)
+xfs_efd_item_size(
+        struct xfs_log_item     *lip)
 {
        return 1;
 }
@@ -388,10 +356,12 @@ xfs_efd_item_size(xfs_efd_log_item_t *efdp)
 * slots in the efd item have been filled.
 */
 STATIC void
-xfs_efd_item_format(xfs_efd_log_item_t  *efdp,
+xfs_efd_item_format(
-                    xfs_log_iovec_t     *log_vector)
+        struct xfs_log_item     *lip,
+        struct xfs_log_iovec    *log_vector)
 {
-        uint    size;
+        struct xfs_efd_log_item *efdp = EFD_ITEM(lip);
+        uint                    size;
        ASSERT(efdp->efd_next_extent == efdp->efd_format.efd_nextents);
@@ -401,48 +371,38 @@ xfs_efd_item_format(xfs_efd_log_item_t	*efdp,
        size += (efdp->efd_format.efd_nextents - 1) * sizeof(xfs_extent_t);
        efdp->efd_format.efd_size = 1;
-        log_vector->i_addr = (xfs_caddr_t)&(efdp->efd_format);
+        log_vector->i_addr = &efdp->efd_format;
        log_vector->i_len = size;
        log_vector->i_type = XLOG_REG_TYPE_EFD_FORMAT;
        ASSERT(size >= sizeof(xfs_efd_log_format_t));
 }
 /*
 * Pinning has no meaning for an efd item, so just return.
 */
-/*ARGSUSED*/
 STATIC void
-xfs_efd_item_pin(xfs_efd_log_item_t *efdp)
+xfs_efd_item_pin(
+        struct xfs_log_item     *lip)
 {
-        return;
 }
 /*
 * Since pinning has no meaning for an efd item, unpinning does
 * not either.
 */
-/*ARGSUSED*/
-STATIC void
-xfs_efd_item_unpin(xfs_efd_log_item_t *efdp)
-{
-        return;
-}
-/*ARGSUSED*/
 STATIC void
-xfs_efd_item_unpin_remove(xfs_efd_log_item_t *efdp, xfs_trans_t *tp)
+xfs_efd_item_unpin(
+        struct xfs_log_item     *lip,
+        int                     remove)
 {
-        return;
 }
 /*
 * Efd items have no locking, so just return success.
 */
-/*ARGSUSED*/
 STATIC uint
-xfs_efd_item_trylock(xfs_efd_log_item_t *efdp)
+xfs_efd_item_trylock(
+        struct xfs_log_item     *lip)
 {
        return XFS_ITEM_LOCKED;
 }
@@ -451,13 +411,12 @@ xfs_efd_item_trylock(xfs_efd_log_item_t *efdp)
 * Efd items have no locking or pushing, so return failure
 * so that the caller doesn't bother with us.
 */
-/*ARGSUSED*/
 STATIC void
-xfs_efd_item_unlock(xfs_efd_log_item_t *efdp)
+xfs_efd_item_unlock(
+        struct xfs_log_item     *lip)
 {
-        if (efdp->efd_item.li_flags & XFS_LI_ABORTED)
+        if (lip->li_flags & XFS_LI_ABORTED)
-                xfs_efd_item_free(efdp);
+                xfs_efd_item_free(EFD_ITEM(lip));
-        return;
 }
 /*
@@ -467,15 +426,18 @@ xfs_efd_item_unlock(xfs_efd_log_item_t *efdp)
 * return -1 to keep the transaction code from further referencing
 * this item.
 */
-/*ARGSUSED*/
 STATIC xfs_lsn_t
-xfs_efd_item_committed(xfs_efd_log_item_t *efdp, xfs_lsn_t lsn)
+xfs_efd_item_committed(
+        struct xfs_log_item     *lip,
+        xfs_lsn_t               lsn)
 {
+        struct xfs_efd_log_item *efdp = EFD_ITEM(lip);
        /*
         * If we got a log I/O error, it's always the case that the LR with the
         * EFI got unpinned and freed before the EFD got aborted.
         */
-        if ((efdp->efd_item.li_flags & XFS_LI_ABORTED) == 0)
+        if (!(lip->li_flags & XFS_LI_ABORTED))
                xfs_efi_release(efdp->efd_efip, efdp->efd_format.efd_nextents);
        xfs_efd_item_free(efdp);
@@ -486,11 +448,10 @@ xfs_efd_item_committed(xfs_efd_log_item_t *efdp, xfs_lsn_t lsn)
 * There isn't much you can do to push on an efd item.  It is simply
 * stuck waiting for the log to be flushed to disk.
 */
-/*ARGSUSED*/
 STATIC void
-xfs_efd_item_push(xfs_efd_log_item_t *efdp)
+xfs_efd_item_push(
+        struct xfs_log_item     *lip)
 {
-        return;
 }
 /*
@@ -500,55 +461,48 @@ xfs_efd_item_push(xfs_efd_log_item_t *efdp)
 * example, for inodes, the inode is locked throughout the extent freeing
 * so the dependency should be recorded there.
 */
-/*ARGSUSED*/
 STATIC void
-xfs_efd_item_committing(xfs_efd_log_item_t *efip, xfs_lsn_t lsn)
+xfs_efd_item_committing(
+        struct xfs_log_item     *lip,
+        xfs_lsn_t               lsn)
 {
-        return;
 }
 /*
 * This is the ops vector shared by all efd log items.
 */
 static struct xfs_item_ops xfs_efd_item_ops = {
-        .iop_size       = (uint(*)(xfs_log_item_t*))xfs_efd_item_size,
+        .iop_size       = xfs_efd_item_size,
-        .iop_format     = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
+        .iop_format     = xfs_efd_item_format,
-                                        xfs_efd_item_format,
+        .iop_pin        = xfs_efd_item_pin,
-        .iop_pin        = (void(*)(xfs_log_item_t*))xfs_efd_item_pin,
+        .iop_unpin      = xfs_efd_item_unpin,
-        .iop_unpin      = (void(*)(xfs_log_item_t*))xfs_efd_item_unpin,
+        .iop_trylock    = xfs_efd_item_trylock,
-        .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*))
+        .iop_unlock     = xfs_efd_item_unlock,
-                                        xfs_efd_item_unpin_remove,
+        .iop_committed  = xfs_efd_item_committed,
-        .iop_trylock    = (uint(*)(xfs_log_item_t*))xfs_efd_item_trylock,
+        .iop_push       = xfs_efd_item_push,
-        .iop_unlock     = (void(*)(xfs_log_item_t*))xfs_efd_item_unlock,
+        .iop_committing = xfs_efd_item_committing
-        .iop_committed  = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
-                                        xfs_efd_item_committed,
-        .iop_push       = (void(*)(xfs_log_item_t*))xfs_efd_item_push,
-        .iop_pushbuf    = NULL,
-        .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
-                                        xfs_efd_item_committing
 };
 /*
 * Allocate and initialize an efd item with the given number of extents.
 */
-xfs_efd_log_item_t *
+struct xfs_efd_log_item *
-xfs_efd_init(xfs_mount_t        *mp,
+xfs_efd_init(
-             xfs_efi_log_item_t *efip,
+        struct xfs_mount        *mp,
-             uint               nextents)
+        struct xfs_efi_log_item *efip,
+        uint                    nextents)
 {
-        xfs_efd_log_item_t      *efdp;
+        struct xfs_efd_log_item *efdp;
        uint                    size;
        ASSERT(nextents > 0);
        if (nextents > XFS_EFD_MAX_FAST_EXTENTS) {
                size = (uint)(sizeof(xfs_efd_log_item_t) +
                        ((nextents - 1) * sizeof(xfs_extent_t)));
-                efdp = (xfs_efd_log_item_t*)kmem_zalloc(size, KM_SLEEP);
+                efdp = kmem_zalloc(size, KM_SLEEP);
        } else {
-                efdp = (xfs_efd_log_item_t*)kmem_zone_zalloc(xfs_efd_zone,
+                efdp = kmem_zone_zalloc(xfs_efd_zone, KM_SLEEP);
-                                                             KM_SLEEP);
        }
        xfs_log_item_init(mp, &efdp->efd_item, XFS_LI_EFD, &xfs_efd_item_ops);
@@ -556,5 +510,5 @@ xfs_efd_init(xfs_mount_t	*mp,
        efdp->efd_format.efd_nextents = nextents;
        efdp->efd_format.efd_efi_id = efip->efi_format.efi_id;
-        return (efdp);
+        return efdp;
 }
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 390850ee6603..9b715dce5699 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -18,13 +18,9 @@
 #include "xfs.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_inum.h"
-#include "xfs_dir2.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_ag.h"
-#include "xfs_dmapi.h"
 #include "xfs_log.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
@@ -127,6 +123,82 @@ typedef struct fstrm_item
        xfs_inode_t     *pip;   /* Parent directory inode pointer. */
 } fstrm_item_t;
+/*
+ * Allocation group filestream associations are tracked with per-ag atomic
+ * counters.  These counters allow _xfs_filestream_pick_ag() to tell whether a
+ * particular AG already has active filestreams associated with it. The mount
+ * point's m_peraglock is used to protect these counters from per-ag array
+ * re-allocation during a growfs operation.  When xfs_growfs_data_private() is
+ * about to reallocate the array, it calls xfs_filestream_flush() with the
+ * m_peraglock held in write mode.
+ *
+ * Since xfs_mru_cache_flush() guarantees that all the free functions for all
+ * the cache elements have finished executing before it returns, it's safe for
+ * the free functions to use the atomic counters without m_peraglock protection.
+ * This allows the implementation of xfs_fstrm_free_func() to be agnostic about
+ * whether it was called with the m_peraglock held in read mode, write mode or
+ * not held at all.  The race condition this addresses is the following:
+ *
+ *  - The work queue scheduler fires and pulls a filestream directory cache
+ *    element off the LRU end of the cache for deletion, then gets pre-empted.
+ *  - A growfs operation grabs the m_peraglock in write mode, flushes all the
+ *    remaining items from the cache and reallocates the mount point's per-ag
+ *    array, resetting all the counters to zero.
+ *  - The work queue thread resumes and calls the free function for the element
+ *    it started cleaning up earlier.  In the process it decrements the
+ *    filestreams counter for an AG that now has no references.
+ *
+ * With a shrinkfs feature, the above scenario could panic the system.
+ *
+ * All other uses of the following macros should be protected by either the
+ * m_peraglock held in read mode, or the cache's internal locking exposed by the
+ * interval between a call to xfs_mru_cache_lookup() and a call to
+ * xfs_mru_cache_done().  In addition, the m_peraglock must be held in read mode
+ * when new elements are added to the cache.
+ *
+ * Combined, these locking rules ensure that no associations will ever exist in
+ * the cache that reference per-ag array elements that have since been
+ * reallocated.
+ */
+static int
+xfs_filestream_peek_ag(
+        xfs_mount_t     *mp,
+        xfs_agnumber_t  agno)
+{
+        struct xfs_perag *pag;
+        int             ret;
+        pag = xfs_perag_get(mp, agno);
+        ret = atomic_read(&pag->pagf_fstrms);
+        xfs_perag_put(pag);
+        return ret;
+}
+static int
+xfs_filestream_get_ag(
+        xfs_mount_t     *mp,
+        xfs_agnumber_t  agno)
+{
+        struct xfs_perag *pag;
+        int             ret;
+        pag = xfs_perag_get(mp, agno);
+        ret = atomic_inc_return(&pag->pagf_fstrms);
+        xfs_perag_put(pag);
+        return ret;
+}
+static void
+xfs_filestream_put_ag(
+        xfs_mount_t     *mp,
+        xfs_agnumber_t  agno)
+{
+        struct xfs_perag *pag;
+        pag = xfs_perag_get(mp, agno);
+        atomic_dec(&pag->pagf_fstrms);
+        xfs_perag_put(pag);
+}
 /*
 * Scan the AGs starting at startag looking for an AG that isn't in use and has
@@ -355,16 +427,14 @@ xfs_fstrm_free_func(
 {
        fstrm_item_t    *item  = (fstrm_item_t *)data;
        xfs_inode_t     *ip = item->ip;
-        int ref;
        ASSERT(ip->i_ino == ino);
        xfs_iflags_clear(ip, XFS_IFILESTREAM);
        /* Drop the reference taken on the AG when the item was added. */
-        ref = xfs_filestream_put_ag(ip->i_mount, item->ag);
+        xfs_filestream_put_ag(ip->i_mount, item->ag);
-        ASSERT(ref >= 0);
        TRACE_FREE(ip->i_mount, ip, item->pip, item->ag,
                xfs_filestream_peek_ag(ip->i_mount, item->ag));
diff --git a/fs/xfs/xfs_filestream.h b/fs/xfs/xfs_filestream.h
index 260f757bbc5d..09dd9af45434 100644
--- a/fs/xfs/xfs_filestream.h
+++ b/fs/xfs/xfs_filestream.h
@@ -42,88 +42,6 @@ extern ktrace_t *xfs_filestreams_trace_buf;
 #endif
-/*
- * Allocation group filestream associations are tracked with per-ag atomic
- * counters.  These counters allow _xfs_filestream_pick_ag() to tell whether a
- * particular AG already has active filestreams associated with it. The mount
- * point's m_peraglock is used to protect these counters from per-ag array
- * re-allocation during a growfs operation.  When xfs_growfs_data_private() is
- * about to reallocate the array, it calls xfs_filestream_flush() with the
- * m_peraglock held in write mode.
- *
- * Since xfs_mru_cache_flush() guarantees that all the free functions for all
- * the cache elements have finished executing before it returns, it's safe for
- * the free functions to use the atomic counters without m_peraglock protection.
- * This allows the implementation of xfs_fstrm_free_func() to be agnostic about
- * whether it was called with the m_peraglock held in read mode, write mode or
- * not held at all.  The race condition this addresses is the following:
- *
- *  - The work queue scheduler fires and pulls a filestream directory cache
- *    element off the LRU end of the cache for deletion, then gets pre-empted.
- *  - A growfs operation grabs the m_peraglock in write mode, flushes all the
- *    remaining items from the cache and reallocates the mount point's per-ag
- *    array, resetting all the counters to zero.
- *  - The work queue thread resumes and calls the free function for the element
- *    it started cleaning up earlier.  In the process it decrements the
- *    filestreams counter for an AG that now has no references.
- *
- * With a shrinkfs feature, the above scenario could panic the system.
- *
- * All other uses of the following macros should be protected by either the
- * m_peraglock held in read mode, or the cache's internal locking exposed by the
- * interval between a call to xfs_mru_cache_lookup() and a call to
- * xfs_mru_cache_done().  In addition, the m_peraglock must be held in read mode
- * when new elements are added to the cache.
- *
- * Combined, these locking rules ensure that no associations will ever exist in
- * the cache that reference per-ag array elements that have since been
- * reallocated.
- */
-/*
- * xfs_filestream_peek_ag is only used in tracing code
- */
-static inline int
-xfs_filestream_peek_ag(
-        xfs_mount_t     *mp,
-        xfs_agnumber_t  agno)
-{
-        struct xfs_perag *pag;
-        int             ret;
-        pag = xfs_perag_get(mp, agno);
-        ret = atomic_read(&pag->pagf_fstrms);
-        xfs_perag_put(pag);
-        return ret;
-}
-static inline int
-xfs_filestream_get_ag(
-        xfs_mount_t     *mp,
-        xfs_agnumber_t  agno)
-{
-        struct xfs_perag *pag;
-        int             ret;
-        pag = xfs_perag_get(mp, agno);
-        ret = atomic_inc_return(&pag->pagf_fstrms);
-        xfs_perag_put(pag);
-        return ret;
-}
-static inline int
-xfs_filestream_put_ag(
-        xfs_mount_t     *mp,
-        xfs_agnumber_t  agno)
-{
-        struct xfs_perag *pag;
-        int             ret;
-        pag = xfs_perag_get(mp, agno);
-        ret = atomic_dec_return(&pag->pagf_fstrms);
-        xfs_perag_put(pag);
-        return ret;
-}
 /* allocation selection flags */
 typedef enum xfs_fstrm_alloc {
        XFS_PICK_USERDATA = 1,
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 7cf7220e7d5f..8f6fc1a96386 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -114,8 +114,10 @@ struct getbmapx {
 #define BMV_IF_NO_DMAPI_READ    0x2     /* Do not generate DMAPI read event  */
 #define BMV_IF_PREALLOC         0x4     /* rtn status BMV_OF_PREALLOC if req */
 #define BMV_IF_DELALLOC         0x8     /* rtn status BMV_OF_DELALLOC if req */
+#define BMV_IF_NO_HOLES         0x10    /* Do not return holes */
 #define BMV_IF_VALID    \
-        (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC|BMV_IF_DELALLOC)
+        (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC|  \
+         BMV_IF_DELALLOC|BMV_IF_NO_HOLES)
 /*      bmv_oflags values - returned for each non-header segment */
 #define BMV_OF_PREALLOC         0x1     /* segment = unwritten pre-allocation */
@@ -291,9 +293,11 @@ typedef struct xfs_bstat {
        __s32           bs_extsize;     /* extent size                  */
        __s32           bs_extents;     /* number of extents            */
        __u32           bs_gen;         /* generation count             */
-        __u16           bs_projid;      /* project id                   */
+        __u16           bs_projid_lo;   /* lower part of project id     */
+#define bs_projid       bs_projid_lo    /* (previously just bs_projid)  */
        __u16           bs_forkoff;     /* inode fork offset in bytes   */
-        unsigned char   bs_pad[12];     /* pad space, unused            */
+        __u16           bs_projid_hi;   /* higher part of project id    */
+        unsigned char   bs_pad[10];     /* pad space, unused            */
        __u32           bs_dmevmask;    /* DMIG event mask              */
        __u16           bs_dmstate;     /* DMIG state info              */
        __u16           bs_aextents;    /* attribute number of extents  */
@@ -446,6 +450,7 @@ typedef struct xfs_handle {
 /*      XFS_IOC_SETBIOSIZE ---- deprecated 46      */
 /*      XFS_IOC_GETBIOSIZE ---- deprecated 47      */
 #define XFS_IOC_GETBMAPX        _IOWR('X', 56, struct getbmap)
+#define XFS_IOC_ZERO_RANGE      _IOW ('X', 57, struct xfs_flock64)
 /*
 * ioctl commands that replace IRIX syssgi()'s
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 37a6f62c57b6..a7c116e814af 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -24,14 +24,10 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_inode_item.h"
@@ -148,12 +144,11 @@ xfs_growfs_data_private(
        if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb)))
                return error;
        dpct = pct - mp->m_sb.sb_imax_pct;
-        error = xfs_read_buf(mp, mp->m_ddev_targp,
+        bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp,
-                        XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1),
+                                XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1),
-                        XFS_FSS_TO_BB(mp, 1), 0, &bp);
+                                BBTOB(XFS_FSS_TO_BB(mp, 1)), 0);
-        if (error)
+        if (!bp)
-                return error;
+                return EIO;
-        ASSERT(bp);
        xfs_buf_relse(bp);
        new = nb;       /* use new as a temporary here */
@@ -601,39 +596,44 @@ out:
                 * the extra reserve blocks from the reserve.....
                 */
                int error;
-                error = xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, fdblks_delta, 0);
+                error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
+                                                 fdblks_delta, 0);
                if (error == ENOSPC)
                        goto retry;
        }
        return 0;
 }
+/*
+ * Dump a transaction into the log that contains no real change. This is needed
+ * to be able to make the log dirty or stamp the current tail LSN into the log
+ * during the covering operation.
+ *
+ * We cannot use an inode here for this - that will push dirty state back up
+ * into the VFS and then periodic inode flushing will prevent log covering from
+ * making progress. Hence we log a field in the superblock instead.
+ */
 int
 xfs_fs_log_dummy(
-        xfs_mount_t     *mp)
+        xfs_mount_t     *mp,
+        int             flags)
 {
        xfs_trans_t     *tp;
-        xfs_inode_t     *ip;
        int             error;
        tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP);
-        error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
+        error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
+                                        XFS_DEFAULT_LOG_COUNT);
        if (error) {
                xfs_trans_cancel(tp, 0);
                return error;
        }
-        ip = mp->m_rootip;
+        /* log the UUID because it is an unchanging field */
-        xfs_ilock(ip, XFS_ILOCK_EXCL);
+        xfs_mod_sb(tp, XFS_SB_UUID);
+        if (flags & SYNC_WAIT)
-        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+                xfs_trans_set_sync(tp);
-        xfs_trans_ihold(tp, ip);
+        return xfs_trans_commit(tp, 0);
-        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-        xfs_trans_set_sync(tp);
-        error = xfs_trans_commit(tp, 0);
-        xfs_iunlock(ip, XFS_ILOCK_EXCL);
-        return error;
 }
 int
diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h
index 88435e0a77c9..a786c5212c1e 100644
--- a/fs/xfs/xfs_fsops.h
+++ b/fs/xfs/xfs_fsops.h
@@ -25,6 +25,6 @@ extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt);
 extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval,
                                xfs_fsop_resblks_t *outval);
 extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags);
-extern int xfs_fs_log_dummy(xfs_mount_t *mp);
+extern int xfs_fs_log_dummy(xfs_mount_t *mp, int flags);
 #endif  /* __XFS_FSOPS_H__ */
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index c7142a064c48..0626a32c3447 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -24,14 +24,10 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_btree.h"
@@ -216,7 +212,7 @@ xfs_ialloc_inode_init(
                 *      to log a whole cluster of inodes instead of all the
                 *      individual transactions causing a lot of log traffic.
                 */
-                xfs_biozero(fbuf, 0, ninodes << mp->m_sb.sb_inodelog);
+                xfs_buf_zero(fbuf, 0, ninodes << mp->m_sb.sb_inodelog);
                for (i = 0; i < ninodes; i++) {
                        int     ioffset = i << mp->m_sb.sb_inodelog;
                        uint    isize = sizeof(struct xfs_dinode);
@@ -1217,7 +1213,6 @@ xfs_imap_lookup(
        struct xfs_inobt_rec_incore rec;
        struct xfs_btree_cur    *cur;
        struct xfs_buf          *agbp;
-        xfs_agino_t             startino;
        int                     error;
        int                     i;
@@ -1231,13 +1226,13 @@ xfs_imap_lookup(
        }
        /*
-         * derive and lookup the exact inode record for the given agino. If the
+         * Lookup the inode record for the given agino. If the record cannot be
-         * record cannot be found, then it's an invalid inode number and we
+         * found, then it's an invalid inode number and we should abort. Once
-         * should abort.
+         * we have a record, we need to ensure it contains the inode number
+         * we are looking up.
         */
        cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
-        startino = agino & ~(XFS_IALLOC_INODES(mp) - 1);
+        error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i);
-        error = xfs_inobt_lookup(cur, startino, XFS_LOOKUP_EQ, &i);
        if (!error) {
                if (i)
                        error = xfs_inobt_get_rec(cur, &rec, &i);
@@ -1250,6 +1245,11 @@ xfs_imap_lookup(
        if (error)
                return error;
+        /* check that the returned record contains the required inode */
+        if (rec.ir_startino > agino ||
+            rec.ir_startino + XFS_IALLOC_INODES(mp) <= agino)
+                return EINVAL;
        /* for untrusted inodes check it is allocated first */
        if ((flags & XFS_IGET_UNTRUSTED) &&
            (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino)))
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c
index c282a9af5393..16921f55c542 100644
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/xfs_ialloc_btree.c
@@ -24,14 +24,10 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_btree.h"
@@ -187,38 +183,6 @@ xfs_inobt_key_diff(
                          cur->bc_rec.i.ir_startino;
 }
-STATIC int
-xfs_inobt_kill_root(
-        struct xfs_btree_cur    *cur,
-        struct xfs_buf          *bp,
-        int                     level,
-        union xfs_btree_ptr     *newroot)
-{
-        int                     error;
-        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-        XFS_BTREE_STATS_INC(cur, killroot);
-        /*
-         * Update the root pointer, decreasing the level by 1 and then
-         * free the old root.
-         */
-        xfs_inobt_set_root(cur, newroot, -1);
-        error = xfs_inobt_free_block(cur, bp);
-        if (error) {
-                XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
-                return error;
-        }
-        XFS_BTREE_STATS_INC(cur, free);
-        cur->bc_bufs[level] = NULL;
-        cur->bc_nlevels--;
-        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-        return 0;
-}
 #ifdef DEBUG
 STATIC int
 xfs_inobt_keys_inorder(
@@ -313,7 +277,6 @@ static const struct xfs_btree_ops xfs_inobt_ops = {
        .dup_cursor             = xfs_inobt_dup_cursor,
        .set_root               = xfs_inobt_set_root,
-        .kill_root              = xfs_inobt_kill_root,
        .alloc_block            = xfs_inobt_alloc_block,
        .free_block             = xfs_inobt_free_block,
        .get_minrecs            = xfs_inobt_get_minrecs,
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 8f8b91be2c99..0cdd26932d8e 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -25,14 +25,10 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_btree.h"
@@ -95,7 +91,7 @@ xfs_inode_alloc(
        return ip;
 }
-STATIC void
+void
 xfs_inode_free(
        struct xfs_inode        *ip)
 {
@@ -212,7 +208,7 @@ xfs_iget_cache_hit(
                        ip->i_flags &= ~XFS_INEW;
                        ip->i_flags |= XFS_IRECLAIMABLE;
                        __xfs_inode_set_reclaim_tag(pag, ip);
-                        trace_xfs_iget_reclaim(ip);
+                        trace_xfs_iget_reclaim_fail(ip);
                        goto out_error;
                }
@@ -227,6 +223,7 @@ xfs_iget_cache_hit(
        } else {
                /* If the VFS inode is being torn down, pause and try again. */
                if (!igrab(inode)) {
+                        trace_xfs_iget_skip(ip);
                        error = EAGAIN;
                        goto out_error;
                }
@@ -234,6 +231,7 @@ xfs_iget_cache_hit(
                /* We've got a live one. */
                spin_unlock(&ip->i_flags_lock);
                read_unlock(&pag->pag_ici_lock);
+                trace_xfs_iget_hit(ip);
        }
        if (lock_flags != 0)
@@ -242,7 +240,6 @@ xfs_iget_cache_hit(
        xfs_iflags_clear(ip, XFS_ISTALE);
        XFS_STATS_INC(xs_ig_found);
-        trace_xfs_iget_found(ip);
        return 0;
 out_error:
@@ -264,7 +261,6 @@ xfs_iget_cache_miss(
 {
        struct xfs_inode        *ip;
        int                     error;
-        unsigned long           first_index, mask;
        xfs_agino_t             agino = XFS_INO_TO_AGINO(mp, ino);
        ip = xfs_inode_alloc(mp, ino);
@@ -275,7 +271,7 @@ xfs_iget_cache_miss(
        if (error)
                goto out_destroy;
-        xfs_itrace_entry(ip);
+        trace_xfs_iget_miss(ip);
        if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
                error = ENOENT;
@@ -301,8 +297,6 @@ xfs_iget_cache_miss(
                        BUG();
        }
-        mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
-        first_index = agino & mask;
        write_lock(&pag->pag_ici_lock);
        /* insert the new inode */
@@ -321,7 +315,6 @@ xfs_iget_cache_miss(
        write_unlock(&pag->pag_ici_lock);
        radix_tree_preload_end();
-        trace_xfs_iget_alloc(ip);
        *ipp = ip;
        return 0;
@@ -372,8 +365,8 @@ xfs_iget(
        xfs_perag_t     *pag;
        xfs_agino_t     agino;
-        /* the radix tree exists only in inode capable AGs */
+        /* reject inode numbers outside existing AGs */
-        if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_maxagi)
+        if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount)
                return EINVAL;
        /* get the perag structure and ensure that it's inode capable */
@@ -422,97 +415,6 @@ out_error_or_again:
 }
 /*
- * Decrement reference count of an inode structure and unlock it.
- *
- * ip -- the inode being released
- * lock_flags -- this parameter indicates the inode's locks to be
- *       to be released.  See the comment on xfs_iunlock() for a list
- *       of valid values.
- */
-void
-xfs_iput(xfs_inode_t    *ip,
-         uint           lock_flags)
-{
-        xfs_itrace_entry(ip);
-        xfs_iunlock(ip, lock_flags);
-        IRELE(ip);
-}
-/*
- * Special iput for brand-new inodes that are still locked
- */
-void
-xfs_iput_new(
-        xfs_inode_t     *ip,
-        uint            lock_flags)
-{
-        struct inode    *inode = VFS_I(ip);
-        xfs_itrace_entry(ip);
-        if ((ip->i_d.di_mode == 0)) {
-                ASSERT(!xfs_iflags_test(ip, XFS_IRECLAIMABLE));
-                make_bad_inode(inode);
-        }
-        if (inode->i_state & I_NEW)
-                unlock_new_inode(inode);
-        if (lock_flags)
-                xfs_iunlock(ip, lock_flags);
-        IRELE(ip);
-}
-/*
- * This is called free all the memory associated with an inode.
- * It must free the inode itself and any buffers allocated for
- * if_extents/if_data and if_broot.  It must also free the lock
- * associated with the inode.
- *
- * Note: because we don't initialise everything on reallocation out
- * of the zone, we must ensure we nullify everything correctly before
- * freeing the structure.
- */
-void
-xfs_ireclaim(
-        struct xfs_inode        *ip)
-{
-        struct xfs_mount        *mp = ip->i_mount;
-        struct xfs_perag        *pag;
-        xfs_agino_t             agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
-        XFS_STATS_INC(xs_ig_reclaims);
-        /*
-         * Remove the inode from the per-AG radix tree.
-         *
-         * Because radix_tree_delete won't complain even if the item was never
-         * added to the tree assert that it's been there before to catch
-         * problems with the inode life time early on.
-         */
-        pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
-        write_lock(&pag->pag_ici_lock);
-        if (!radix_tree_delete(&pag->pag_ici_root, agino))
-                ASSERT(0);
-        write_unlock(&pag->pag_ici_lock);
-        xfs_perag_put(pag);
-        /*
-         * Here we do an (almost) spurious inode lock in order to coordinate
-         * with inode cache radix tree lookups.  This is because the lookup
-         * can reference the inodes in the cache without taking references.
-         *
-         * We make that OK here by ensuring that we wait until the inode is
-         * unlocked after the lookup before we go ahead and free it.  We get
-         * both the ilock and the iolock because the code may need to drop the
-         * ilock one but will still hold the iolock.
-         */
-        xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-        xfs_qm_dqdetach(ip);
-        xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-        xfs_inode_free(ip);
-}
-/*
 * This is a wrapper routine around the xfs_ilock() routine
 * used to centralize some grungy code.  It is used in places
 * that wish to lock the inode solely for reading the extents.
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index b76a829d7e20..108c7a085f94 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -27,13 +27,10 @@
 #include "xfs_trans_priv.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
@@ -44,7 +41,6 @@
 #include "xfs_alloc.h"
 #include "xfs_ialloc.h"
 #include "xfs_bmap.h"
-#include "xfs_rw.h"
 #include "xfs_error.h"
 #include "xfs_utils.h"
 #include "xfs_quota.h"
@@ -426,7 +422,7 @@ xfs_iformat(
        if (!XFS_DFORK_Q(dip))
                return 0;
        ASSERT(ip->i_afp == NULL);
-        ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP);
+        ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS);
        ip->i_afp->if_ext_max =
                XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
        switch (dip->di_aformat) {
@@ -509,7 +505,7 @@ xfs_iformat_local(
                ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
        else {
                real_size = roundup(size, 4);
-                ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP);
+                ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS);
        }
        ifp->if_bytes = size;
        ifp->if_real_bytes = real_size;
@@ -636,7 +632,7 @@ xfs_iformat_btree(
        }
        ifp->if_broot_bytes = size;
-        ifp->if_broot = kmem_alloc(size, KM_SLEEP);
+        ifp->if_broot = kmem_alloc(size, KM_SLEEP | KM_NOFS);
        ASSERT(ifp->if_broot != NULL);
        /*
         * Copy and convert from the on-disk structure
@@ -664,7 +660,8 @@ xfs_dinode_from_disk(
        to->di_uid = be32_to_cpu(from->di_uid);
        to->di_gid = be32_to_cpu(from->di_gid);
        to->di_nlink = be32_to_cpu(from->di_nlink);
-        to->di_projid = be16_to_cpu(from->di_projid);
+        to->di_projid_lo = be16_to_cpu(from->di_projid_lo);
+        to->di_projid_hi = be16_to_cpu(from->di_projid_hi);
        memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
        to->di_flushiter = be16_to_cpu(from->di_flushiter);
        to->di_atime.t_sec = be32_to_cpu(from->di_atime.t_sec);
@@ -699,7 +696,8 @@ xfs_dinode_to_disk(
        to->di_uid = cpu_to_be32(from->di_uid);
        to->di_gid = cpu_to_be32(from->di_gid);
        to->di_nlink = cpu_to_be32(from->di_nlink);
-        to->di_projid = cpu_to_be16(from->di_projid);
+        to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
+        to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
        memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
        to->di_flushiter = cpu_to_be16(from->di_flushiter);
        to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
@@ -878,7 +876,7 @@ xfs_iread(
        if (ip->i_d.di_version == 1) {
                ip->i_d.di_nlink = ip->i_d.di_onlink;
                ip->i_d.di_onlink = 0;
-                ip->i_d.di_projid = 0;
+                xfs_set_projid(ip, 0);
        }
        ip->i_delayed_blks = 0;
@@ -922,7 +920,6 @@ xfs_iread_extents(
        int             error;
        xfs_ifork_t     *ifp;
        xfs_extnum_t    nextents;
-        size_t          size;
        if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
                XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW,
@@ -930,7 +927,6 @@ xfs_iread_extents(
                return XFS_ERROR(EFSCORRUPTED);
        }
        nextents = XFS_IFORK_NEXTENTS(ip, whichfork);
-        size = nextents * sizeof(xfs_bmbt_rec_t);
        ifp = XFS_IFORK_PTR(ip, whichfork);
        /*
@@ -988,8 +984,7 @@ xfs_ialloc(
        mode_t          mode,
        xfs_nlink_t     nlink,
        xfs_dev_t       rdev,
-        cred_t          *cr,
+        prid_t          prid,
-        xfs_prid_t      prid,
        int             okalloc,
        xfs_buf_t       **ialloc_context,
        boolean_t       *call_again,
@@ -1033,7 +1028,7 @@ xfs_ialloc(
        ASSERT(ip->i_d.di_nlink == nlink);
        ip->i_d.di_uid = current_fsuid();
        ip->i_d.di_gid = current_fsgid();
-        ip->i_d.di_projid = prid;
+        xfs_set_projid(ip, prid);
        memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
        /*
@@ -1226,7 +1221,7 @@ xfs_isize_check(
                                       (xfs_ufsize_t)XFS_MAXIOFFSET(mp)) -
                          map_first),
                         XFS_BMAPI_ENTIRE, NULL, 0, imaps, &nimaps,
-                         NULL, NULL))
+                         NULL))
            return;
        ASSERT(nimaps == 1);
        ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK);
@@ -1460,7 +1455,7 @@ xfs_itruncate_finish(
        ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
        ASSERT(ip->i_transp == *tp);
        ASSERT(ip->i_itemp != NULL);
-        ASSERT(ip->i_itemp->ili_flags & XFS_ILI_HOLD);
+        ASSERT(ip->i_itemp->ili_lock_flags == 0);
        ntp = *tp;
@@ -1589,11 +1584,10 @@ xfs_itruncate_finish(
                xfs_bmap_init(&free_list, &first_block);
                error = xfs_bunmapi(ntp, ip,
                                    first_unmap_block, unmap_len,
-                                    xfs_bmapi_aflag(fork) |
+                                    xfs_bmapi_aflag(fork),
-                                      (sync ? 0 : XFS_BMAPI_ASYNC),
                                    XFS_ITRUNC_MAX_EXTENTS,
                                    &first_block, &free_list,
-                                    NULL, &done);
+                                    &done);
                if (error) {
                        /*
                         * If the bunmapi call encounters an error,
@@ -1612,12 +1606,8 @@ xfs_itruncate_finish(
                 */
                error = xfs_bmap_finish(tp, &free_list, &committed);
                ntp = *tp;
-                if (committed) {
+                if (committed)
-                        /* link the inode into the next xact in the chain */
+                        xfs_trans_ijoin(ntp, ip);
-                        xfs_trans_ijoin(ntp, ip,
-                                        XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-                        xfs_trans_ihold(ntp, ip);
-                }
                if (error) {
                        /*
@@ -1646,9 +1636,7 @@ xfs_itruncate_finish(
                error = xfs_trans_commit(*tp, 0);
                *tp = ntp;
-                /* link the inode into the next transaction in the chain */
+                xfs_trans_ijoin(ntp, ip);
-                xfs_trans_ijoin(ntp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-                xfs_trans_ihold(ntp, ip);
                if (error)
                        return error;
@@ -1927,6 +1915,11 @@ xfs_iunlink_remove(
        return 0;
 }
+/*
+ * A big issue when freeing the inode cluster is is that we _cannot_ skip any
+ * inodes that are in memory - they all must be marked stale and attached to
+ * the cluster buffer.
+ */
 STATIC void
 xfs_ifree_cluster(
        xfs_inode_t     *free_ip,
@@ -1958,8 +1951,6 @@ xfs_ifree_cluster(
        }
        for (j = 0; j < nbufs; j++, inum += ninodes) {
-                int     found = 0;
                blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
                                         XFS_INO_TO_AGBNO(mp, inum));
@@ -1978,23 +1969,25 @@ xfs_ifree_cluster(
                /*
                 * Walk the inodes already attached to the buffer and mark them
                 * stale. These will all have the flush locks held, so an
-                 * in-memory inode walk can't lock them.
+                 * in-memory inode walk can't lock them. By marking them all
+                 * stale first, we will not attempt to lock them in the loop
+                 * below as the XFS_ISTALE flag will be set.
                 */
                lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
                while (lip) {
                        if (lip->li_type == XFS_LI_INODE) {
                                iip = (xfs_inode_log_item_t *)lip;
                                ASSERT(iip->ili_logged == 1);
-                                lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done;
+                                lip->li_cb = xfs_istale_done;
                                xfs_trans_ail_copy_lsn(mp->m_ail,
                                                        &iip->ili_flush_lsn,
                                                        &iip->ili_item.li_lsn);
                                xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
-                                found++;
                        }
                        lip = lip->li_bio_list;
                }
                /*
                 * For each inode in memory attempt to add it to the inode
                 * buffer and set it up for being staled on buffer IO
@@ -2006,6 +1999,7 @@ xfs_ifree_cluster(
                 * even trying to lock them.
                 */
                for (i = 0; i < ninodes; i++) {
+retry:
                        read_lock(&pag->pag_ici_lock);
                        ip = radix_tree_lookup(&pag->pag_ici_root,
                                        XFS_INO_TO_AGINO(mp, (inum + i)));
@@ -2016,38 +2010,36 @@ xfs_ifree_cluster(
                                continue;
                        }
-                        /* don't try to lock/unlock the current inode */
+                        /*
+                         * Don't try to lock/unlock the current inode, but we
+                         * _cannot_ skip the other inodes that we did not find
+                         * in the list attached to the buffer and are not
+                         * already marked stale. If we can't lock it, back off
+                         * and retry.
+                         */
                        if (ip != free_ip &&
                            !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
                                read_unlock(&pag->pag_ici_lock);
-                                continue;
+                                delay(1);
+                                goto retry;
                        }
                        read_unlock(&pag->pag_ici_lock);
-                        if (!xfs_iflock_nowait(ip)) {
+                        xfs_iflock(ip);
-                                if (ip != free_ip)
-                                        xfs_iunlock(ip, XFS_ILOCK_EXCL);
-                                continue;
-                        }
                        xfs_iflags_set(ip, XFS_ISTALE);
-                        if (xfs_inode_clean(ip)) {
-                                ASSERT(ip != free_ip);
-                                xfs_ifunlock(ip);
-                                xfs_iunlock(ip, XFS_ILOCK_EXCL);
-                                continue;
-                        }
+                        /*
+                         * we don't need to attach clean inodes or those only
+                         * with unlogged changes (which we throw away, anyway).
+                         */
                        iip = ip->i_itemp;
-                        if (!iip) {
+                        if (!iip || xfs_inode_clean(ip)) {
-                                /* inode with unlogged changes only */
                                ASSERT(ip != free_ip);
                                ip->i_update_core = 0;
                                xfs_ifunlock(ip);
                                xfs_iunlock(ip, XFS_ILOCK_EXCL);
                                continue;
                        }
-                        found++;
                        iip->ili_last_fields = iip->ili_format.ilf_fields;
                        iip->ili_format.ilf_fields = 0;
@@ -2055,16 +2047,14 @@ xfs_ifree_cluster(
                        xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
                                                &iip->ili_item.li_lsn);
-                        xfs_buf_attach_iodone(bp,
+                        xfs_buf_attach_iodone(bp, xfs_istale_done,
-                                (void(*)(xfs_buf_t*,xfs_log_item_t*))
+                                                  &iip->ili_item);
-                                xfs_istale_done, (xfs_log_item_t *)iip);
                        if (ip != free_ip)
                                xfs_iunlock(ip, XFS_ILOCK_EXCL);
                }
-                if (found)
+                xfs_trans_stale_inode_buf(tp, bp);
-                        xfs_trans_stale_inode_buf(tp, bp);
                xfs_trans_binval(tp, bp);
        }
@@ -2203,7 +2193,7 @@ xfs_iroot_realloc(
                 */
                if (ifp->if_broot_bytes == 0) {
                        new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(rec_diff);
-                        ifp->if_broot = kmem_alloc(new_size, KM_SLEEP);
+                        ifp->if_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
                        ifp->if_broot_bytes = (int)new_size;
                        return;
                }
@@ -2219,7 +2209,7 @@ xfs_iroot_realloc(
                new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max);
                ifp->if_broot = kmem_realloc(ifp->if_broot, new_size,
                                (size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* old size */
-                                KM_SLEEP);
+                                KM_SLEEP | KM_NOFS);
                op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
                                                     ifp->if_broot_bytes);
                np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
@@ -2245,7 +2235,7 @@ xfs_iroot_realloc(
        else
                new_size = 0;
        if (new_size > 0) {
-                new_broot = kmem_alloc(new_size, KM_SLEEP);
+                new_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
                /*
                 * First copy over the btree block header.
                 */
@@ -2349,7 +2339,8 @@ xfs_idata_realloc(
                real_size = roundup(new_size, 4);
                if (ifp->if_u1.if_data == NULL) {
                        ASSERT(ifp->if_real_bytes == 0);
-                        ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP);
+                        ifp->if_u1.if_data = kmem_alloc(real_size,
+                                                        KM_SLEEP | KM_NOFS);
                } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
                        /*
                         * Only do the realloc if the underlying size
@@ -2360,11 +2351,12 @@ xfs_idata_realloc(
                                        kmem_realloc(ifp->if_u1.if_data,
                                                        real_size,
                                                        ifp->if_real_bytes,
-                                                        KM_SLEEP);
+                                                        KM_SLEEP | KM_NOFS);
                        }
                } else {
                        ASSERT(ifp->if_real_bytes == 0);
-                        ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP);
+                        ifp->if_u1.if_data = kmem_alloc(real_size,
+                                                        KM_SLEEP | KM_NOFS);
                        memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data,
                                ifp->if_bytes);
                }
@@ -2731,11 +2723,10 @@ cluster_corrupt_out:
                 * mark it as stale and brelse.
                 */
                if (XFS_BUF_IODONE_FUNC(bp)) {
-                        XFS_BUF_CLR_BDSTRAT_FUNC(bp);
                        XFS_BUF_UNDONE(bp);
                        XFS_BUF_STALE(bp);
                        XFS_BUF_ERROR(bp,EIO);
-                        xfs_biodone(bp);
+                        xfs_buf_ioend(bp, 0);
                } else {
                        XFS_BUF_STALE(bp);
                        xfs_buf_relse(bp);
@@ -3018,7 +3009,7 @@ xfs_iflush_int(
                        memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
                        memset(&(dip->di_pad[0]), 0,
                              sizeof(dip->di_pad));
-                        ASSERT(ip->i_d.di_projid == 0);
+                        ASSERT(xfs_get_projid(ip) == 0);
                }
        }
@@ -3069,8 +3060,7 @@ xfs_iflush_int(
                 * and unlock the inode's flush lock when the inode is
                 * completely written to disk.
                 */
-                xfs_buf_attach_iodone(bp, (void(*)(xfs_buf_t*,xfs_log_item_t*))
+                xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item);
-                                      xfs_iflush_done, (xfs_log_item_t *)iip);
                ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
                ASSERT(XFS_BUF_IODONE_FUNC(bp) != NULL);
@@ -3514,13 +3504,11 @@ xfs_iext_remove_indirect(
        xfs_extnum_t    ext_diff;       /* extents to remove in current list */
        xfs_extnum_t    nex1;           /* number of extents before idx */
        xfs_extnum_t    nex2;           /* extents after idx + count */
-        int             nlists;         /* entries in indirection array */
        int             page_idx = idx; /* index in target extent list */
        ASSERT(ifp->if_flags & XFS_IFEXTIREC);
        erp = xfs_iext_idx_to_irec(ifp,  &page_idx, &erp_idx, 0);
        ASSERT(erp != NULL);
-        nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
        nex1 = page_idx;
        ext_cnt = count;
        while (ext_cnt) {
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 78550df13cd6..fb2ca2e4cdc9 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -134,8 +134,9 @@ typedef struct xfs_icdinode {
        __uint32_t      di_uid;         /* owner's user id */
        __uint32_t      di_gid;         /* owner's group id */
        __uint32_t      di_nlink;       /* number of links to file */
-        __uint16_t      di_projid;      /* owner's project id */
+        __uint16_t      di_projid_lo;   /* lower part of owner's project id */
-        __uint8_t       di_pad[8];      /* unused, zeroed space */
+        __uint16_t      di_projid_hi;   /* higher part of owner's project id */
+        __uint8_t       di_pad[6];      /* unused, zeroed space */
        __uint16_t      di_flushiter;   /* incremented on flush */
        xfs_ictimestamp_t di_atime;     /* time last accessed */
        xfs_ictimestamp_t di_mtime;     /* time last modified */
@@ -212,7 +213,6 @@ typedef struct xfs_icdinode {
 #ifdef __KERNEL__
 struct bhv_desc;
-struct cred;
 struct xfs_buf;
 struct xfs_bmap_free;
 struct xfs_bmbt_irec;
@@ -335,6 +335,25 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
 }
 /*
+ * Project quota id helpers (previously projid was 16bit only
+ * and using two 16bit values to hold new 32bit projid was choosen
+ * to retain compatibility with "old" filesystems).
+ */
+static inline prid_t
+xfs_get_projid(struct xfs_inode *ip)
+{
+        return (prid_t)ip->i_d.di_projid_hi << 16 | ip->i_d.di_projid_lo;
+}
+static inline void
+xfs_set_projid(struct xfs_inode *ip,
+                prid_t projid)
+{
+        ip->i_d.di_projid_hi = (__uint16_t) (projid >> 16);
+        ip->i_d.di_projid_lo = (__uint16_t) (projid & 0xffff);
+}
+/*
 * Manage the i_flush queue embedded in the inode.  This completion
 * queue synchronizes processes attempting to flush the in-core
 * inode back to disk.
@@ -443,8 +462,6 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
 */
 int             xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
                         uint, uint, xfs_inode_t **);
-void            xfs_iput(xfs_inode_t *, uint);
-void            xfs_iput_new(xfs_inode_t *, uint);
 void            xfs_ilock(xfs_inode_t *, uint);
 int             xfs_ilock_nowait(xfs_inode_t *, uint);
 void            xfs_iunlock(xfs_inode_t *, uint);
@@ -452,14 +469,14 @@ void		xfs_ilock_demote(xfs_inode_t *, uint);
 int             xfs_isilocked(xfs_inode_t *, uint);
 uint            xfs_ilock_map_shared(xfs_inode_t *);
 void            xfs_iunlock_map_shared(xfs_inode_t *, uint);
-void            xfs_ireclaim(xfs_inode_t *);
+void            xfs_inode_free(struct xfs_inode *ip);
 /*
 * xfs_inode.c prototypes.
 */
 int             xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t,
-                           xfs_nlink_t, xfs_dev_t, cred_t *, xfs_prid_t,
+                           xfs_nlink_t, xfs_dev_t, prid_t, int,
-                           int, struct xfs_buf **, boolean_t *, xfs_inode_t **);
+                           struct xfs_buf **, boolean_t *, xfs_inode_t **);
 uint            xfs_ip2xflags(struct xfs_inode *);
 uint            xfs_dic2xflags(struct xfs_dinode *);
@@ -473,7 +490,6 @@ int		xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
 void            xfs_iext_realloc(xfs_inode_t *, int, int);
 void            xfs_iunpin_wait(xfs_inode_t *);
 int             xfs_iflush(xfs_inode_t *, uint);
-void            xfs_ichgtime(xfs_inode_t *, int);
 void            xfs_lock_inodes(xfs_inode_t **, int, uint);
 void            xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
@@ -484,7 +500,7 @@ void		xfs_mark_inode_dirty_sync(xfs_inode_t *);
 #define IHOLD(ip) \
 do { \
        ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \
-        atomic_inc(&(VFS_I(ip)->i_count)); \
+        ihold(VFS_I(ip)); \
        trace_xfs_ihold(ip, _THIS_IP_); \
 } while (0)
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index cf8249a60004..c7ac020705df 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -22,30 +22,26 @@
 #include "xfs_log.h"
 #include "xfs_inum.h"
 #include "xfs_trans.h"
-#include "xfs_buf_item.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_trans_priv.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_inode_item.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_rw.h"
 #include "xfs_error.h"
 #include "xfs_trace.h"
 kmem_zone_t     *xfs_ili_zone;          /* inode log item zone */
+static inline struct xfs_inode_log_item *INODE_ITEM(struct xfs_log_item *lip)
+{
+        return container_of(lip, struct xfs_inode_log_item, ili_item);
+}
 /*
 * This returns the number of iovecs needed to log the given inode item.
 *
@@ -55,13 +51,11 @@ kmem_zone_t	*xfs_ili_zone;		/* inode log item zone */
 */
 STATIC uint
 xfs_inode_item_size(
-        xfs_inode_log_item_t    *iip)
+        struct xfs_log_item     *lip)
 {
-        uint            nvecs;
+        struct xfs_inode_log_item *iip = INODE_ITEM(lip);
-        xfs_inode_t     *ip;
+        struct xfs_inode        *ip = iip->ili_inode;
+        uint                    nvecs = 2;
-        ip = iip->ili_inode;
-        nvecs = 2;
        /*
         * Only log the data/extents/b-tree root if there is something
@@ -212,36 +206,23 @@ xfs_inode_item_size(
 */
 STATIC void
 xfs_inode_item_format(
-        xfs_inode_log_item_t    *iip,
+        struct xfs_log_item     *lip,
-        xfs_log_iovec_t         *log_vector)
+        struct xfs_log_iovec    *vecp)
 {
+        struct xfs_inode_log_item *iip = INODE_ITEM(lip);
+        struct xfs_inode        *ip = iip->ili_inode;
        uint                    nvecs;
-        xfs_log_iovec_t         *vecp;
-        xfs_inode_t             *ip;
        size_t                  data_bytes;
        xfs_bmbt_rec_t          *ext_buffer;
-        int                     nrecs;
        xfs_mount_t             *mp;
-        ip = iip->ili_inode;
+        vecp->i_addr = &iip->ili_format;
-        vecp = log_vector;
-        vecp->i_addr = (xfs_caddr_t)&iip->ili_format;
        vecp->i_len  = sizeof(xfs_inode_log_format_t);
        vecp->i_type = XLOG_REG_TYPE_IFORMAT;
        vecp++;
        nvecs        = 1;
        /*
-         * Make sure the linux inode is dirty. We do this before
-         * clearing i_update_core as the VFS will call back into
-         * XFS here and set i_update_core, so we need to dirty the
-         * inode first so that the ordering of i_update_core and
-         * unlogged modifications still works as described below.
-         */
-        xfs_mark_inode_dirty_sync(ip);
-        /*
         * Clear i_update_core if the timestamps (or any other
         * non-transactional modification) need flushing/logging
         * and we're about to log them with the rest of the core.
@@ -277,7 +258,7 @@ xfs_inode_item_format(
         */
        xfs_synchronize_times(ip);
-        vecp->i_addr = (xfs_caddr_t)&ip->i_d;
+        vecp->i_addr = &ip->i_d;
        vecp->i_len  = sizeof(struct xfs_icdinode);
        vecp->i_type = XLOG_REG_TYPE_ICORE;
        vecp++;
@@ -323,18 +304,17 @@ xfs_inode_item_format(
                        ASSERT(ip->i_df.if_u1.if_extents != NULL);
                        ASSERT(ip->i_d.di_nextents > 0);
                        ASSERT(iip->ili_extents_buf == NULL);
-                        nrecs = ip->i_df.if_bytes /
+                        ASSERT((ip->i_df.if_bytes /
-                                (uint)sizeof(xfs_bmbt_rec_t);
+                                (uint)sizeof(xfs_bmbt_rec_t)) > 0);
-                        ASSERT(nrecs > 0);
 #ifdef XFS_NATIVE_HOST
-                        if (nrecs == ip->i_d.di_nextents) {
+                       if (ip->i_d.di_nextents == ip->i_df.if_bytes /
+                                               (uint)sizeof(xfs_bmbt_rec_t)) {
                                /*
                                 * There are no delayed allocation
                                 * extents, so just point to the
                                 * real extents array.
                                 */
-                                vecp->i_addr =
+                                vecp->i_addr = ip->i_df.if_u1.if_extents;
-                                        (char *)(ip->i_df.if_u1.if_extents);
                                vecp->i_len = ip->i_df.if_bytes;
                                vecp->i_type = XLOG_REG_TYPE_IEXT;
                        } else
@@ -352,7 +332,7 @@ xfs_inode_item_format(
                                ext_buffer = kmem_alloc(ip->i_df.if_bytes,
                                        KM_SLEEP);
                                iip->ili_extents_buf = ext_buffer;
-                                vecp->i_addr = (xfs_caddr_t)ext_buffer;
+                                vecp->i_addr = ext_buffer;
                                vecp->i_len = xfs_iextents_copy(ip, ext_buffer,
                                                XFS_DATA_FORK);
                                vecp->i_type = XLOG_REG_TYPE_IEXT;
@@ -371,7 +351,7 @@ xfs_inode_item_format(
                if (iip->ili_format.ilf_fields & XFS_ILOG_DBROOT) {
                        ASSERT(ip->i_df.if_broot_bytes > 0);
                        ASSERT(ip->i_df.if_broot != NULL);
-                        vecp->i_addr = (xfs_caddr_t)ip->i_df.if_broot;
+                        vecp->i_addr = ip->i_df.if_broot;
                        vecp->i_len = ip->i_df.if_broot_bytes;
                        vecp->i_type = XLOG_REG_TYPE_IBROOT;
                        vecp++;
@@ -389,7 +369,7 @@ xfs_inode_item_format(
                        ASSERT(ip->i_df.if_u1.if_data != NULL);
                        ASSERT(ip->i_d.di_size > 0);
-                        vecp->i_addr = (xfs_caddr_t)ip->i_df.if_u1.if_data;
+                        vecp->i_addr = ip->i_df.if_u1.if_data;
                        /*
                         * Round i_bytes up to a word boundary.
                         * The underlying memory is guaranteed to
@@ -437,7 +417,7 @@ xfs_inode_item_format(
         * Assert that no attribute-related log flags are set.
         */
        if (!XFS_IFORK_Q(ip)) {
-                ASSERT(nvecs == iip->ili_item.li_desc->lid_size);
+                ASSERT(nvecs == lip->li_desc->lid_size);
                iip->ili_format.ilf_size = nvecs;
                ASSERT(!(iip->ili_format.ilf_fields &
                         (XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT)));
@@ -449,21 +429,21 @@ xfs_inode_item_format(
                ASSERT(!(iip->ili_format.ilf_fields &
                         (XFS_ILOG_ADATA | XFS_ILOG_ABROOT)));
                if (iip->ili_format.ilf_fields & XFS_ILOG_AEXT) {
-                        ASSERT(ip->i_afp->if_bytes > 0);
-                        ASSERT(ip->i_afp->if_u1.if_extents != NULL);
-                        ASSERT(ip->i_d.di_anextents > 0);
 #ifdef DEBUG
-                        nrecs = ip->i_afp->if_bytes /
+                        int nrecs = ip->i_afp->if_bytes /
                                (uint)sizeof(xfs_bmbt_rec_t);
-#endif
                        ASSERT(nrecs > 0);
                        ASSERT(nrecs == ip->i_d.di_anextents);
+                        ASSERT(ip->i_afp->if_bytes > 0);
+                        ASSERT(ip->i_afp->if_u1.if_extents != NULL);
+                        ASSERT(ip->i_d.di_anextents > 0);
+#endif
 #ifdef XFS_NATIVE_HOST
                        /*
                         * There are not delayed allocation extents
                         * for attributes, so just point at the array.
                         */
-                        vecp->i_addr = (char *)(ip->i_afp->if_u1.if_extents);
+                        vecp->i_addr = ip->i_afp->if_u1.if_extents;
                        vecp->i_len = ip->i_afp->if_bytes;
 #else
                        ASSERT(iip->ili_aextents_buf == NULL);
@@ -473,7 +453,7 @@ xfs_inode_item_format(
                        ext_buffer = kmem_alloc(ip->i_afp->if_bytes,
                                KM_SLEEP);
                        iip->ili_aextents_buf = ext_buffer;
-                        vecp->i_addr = (xfs_caddr_t)ext_buffer;
+                        vecp->i_addr = ext_buffer;
                        vecp->i_len = xfs_iextents_copy(ip, ext_buffer,
                                        XFS_ATTR_FORK);
 #endif
@@ -490,7 +470,7 @@ xfs_inode_item_format(
                if (iip->ili_format.ilf_fields & XFS_ILOG_ABROOT) {
                        ASSERT(ip->i_afp->if_broot_bytes > 0);
                        ASSERT(ip->i_afp->if_broot != NULL);
-                        vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_broot;
+                        vecp->i_addr = ip->i_afp->if_broot;
                        vecp->i_len = ip->i_afp->if_broot_bytes;
                        vecp->i_type = XLOG_REG_TYPE_IATTR_BROOT;
                        vecp++;
@@ -506,7 +486,7 @@ xfs_inode_item_format(
                        ASSERT(ip->i_afp->if_bytes > 0);
                        ASSERT(ip->i_afp->if_u1.if_data != NULL);
-                        vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_u1.if_data;
+                        vecp->i_addr = ip->i_afp->if_u1.if_data;
                        /*
                         * Round i_bytes up to a word boundary.
                         * The underlying memory is guaranteed to
@@ -528,7 +508,7 @@ xfs_inode_item_format(
                break;
        }
-        ASSERT(nvecs == iip->ili_item.li_desc->lid_size);
+        ASSERT(nvecs == lip->li_desc->lid_size);
        iip->ili_format.ilf_size = nvecs;
 }
@@ -539,12 +519,14 @@ xfs_inode_item_format(
 */
 STATIC void
 xfs_inode_item_pin(
-        xfs_inode_log_item_t    *iip)
+        struct xfs_log_item     *lip)
 {
-        ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL));
+        struct xfs_inode        *ip = INODE_ITEM(lip)->ili_inode;
+        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-        trace_xfs_inode_pin(iip->ili_inode, _RET_IP_);
+        trace_xfs_inode_pin(ip, _RET_IP_);
-        atomic_inc(&iip->ili_inode->i_pincount);
+        atomic_inc(&ip->i_pincount);
 }
@@ -554,12 +536,12 @@ xfs_inode_item_pin(
 *
 * Also wake up anyone in xfs_iunpin_wait() if the count goes to 0.
 */
-/* ARGSUSED */
 STATIC void
 xfs_inode_item_unpin(
-        xfs_inode_log_item_t    *iip)
+        struct xfs_log_item     *lip,
+        int                     remove)
 {
-        struct xfs_inode        *ip = iip->ili_inode;
+        struct xfs_inode        *ip = INODE_ITEM(lip)->ili_inode;
        trace_xfs_inode_unpin(ip, _RET_IP_);
        ASSERT(atomic_read(&ip->i_pincount) > 0);
@@ -567,15 +549,6 @@ xfs_inode_item_unpin(
                wake_up(&ip->i_ipin_wait);
 }
-/* ARGSUSED */
-STATIC void
-xfs_inode_item_unpin_remove(
-        xfs_inode_log_item_t    *iip,
-        xfs_trans_t             *tp)
-{
-        xfs_inode_item_unpin(iip);
-}
 /*
 * This is called to attempt to lock the inode associated with this
 * inode log item, in preparation for the push routine which does the actual
@@ -591,19 +564,16 @@ xfs_inode_item_unpin_remove(
 */
 STATIC uint
 xfs_inode_item_trylock(
-        xfs_inode_log_item_t    *iip)
+        struct xfs_log_item     *lip)
 {
-        register xfs_inode_t    *ip;
+        struct xfs_inode_log_item *iip = INODE_ITEM(lip);
+        struct xfs_inode        *ip = iip->ili_inode;
-        ip = iip->ili_inode;
-        if (xfs_ipincount(ip) > 0) {
+        if (xfs_ipincount(ip) > 0)
                return XFS_ITEM_PINNED;
-        }
-        if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
+        if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
                return XFS_ITEM_LOCKED;
-        }
        if (!xfs_iflock_nowait(ip)) {
                /*
@@ -629,7 +599,7 @@ xfs_inode_item_trylock(
        if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
                ASSERT(iip->ili_format.ilf_fields != 0);
                ASSERT(iip->ili_logged == 0);
-                ASSERT(iip->ili_item.li_flags & XFS_LI_IN_AIL);
+                ASSERT(lip->li_flags & XFS_LI_IN_AIL);
        }
 #endif
        return XFS_ITEM_SUCCESS;
@@ -643,26 +613,18 @@ xfs_inode_item_trylock(
 */
 STATIC void
 xfs_inode_item_unlock(
-        xfs_inode_log_item_t    *iip)
+        struct xfs_log_item     *lip)
 {
-        uint            hold;
+        struct xfs_inode_log_item *iip = INODE_ITEM(lip);
-        uint            iolocked;
+        struct xfs_inode        *ip = iip->ili_inode;
-        uint            lock_flags;
+        unsigned short          lock_flags;
-        xfs_inode_t     *ip;
-        ASSERT(iip != NULL);
        ASSERT(iip->ili_inode->i_itemp != NULL);
        ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL));
-        ASSERT((!(iip->ili_inode->i_itemp->ili_flags &
-                  XFS_ILI_IOLOCKED_EXCL)) ||
-               xfs_isilocked(iip->ili_inode, XFS_IOLOCK_EXCL));
-        ASSERT((!(iip->ili_inode->i_itemp->ili_flags &
-                  XFS_ILI_IOLOCKED_SHARED)) ||
-               xfs_isilocked(iip->ili_inode, XFS_IOLOCK_SHARED));
        /*
         * Clear the transaction pointer in the inode.
         */
-        ip = iip->ili_inode;
        ip->i_transp = NULL;
        /*
@@ -686,34 +648,11 @@ xfs_inode_item_unlock(
                iip->ili_aextents_buf = NULL;
        }
-        /*
+        lock_flags = iip->ili_lock_flags;
-         * Figure out if we should unlock the inode or not.
+        iip->ili_lock_flags = 0;
-         */
+        if (lock_flags) {
-        hold = iip->ili_flags & XFS_ILI_HOLD;
+                xfs_iunlock(iip->ili_inode, lock_flags);
+                IRELE(iip->ili_inode);
-        /*
-         * Before clearing out the flags, remember whether we
-         * are holding the inode's IO lock.
-         */
-        iolocked = iip->ili_flags & XFS_ILI_IOLOCKED_ANY;
-        /*
-         * Clear out the fields of the inode log item particular
-         * to the current transaction.
-         */
-        iip->ili_flags = 0;
-        /*
-         * Unlock the inode if XFS_ILI_HOLD was not set.
-         */
-        if (!hold) {
-                lock_flags = XFS_ILOCK_EXCL;
-                if (iolocked & XFS_ILI_IOLOCKED_EXCL) {
-                        lock_flags |= XFS_IOLOCK_EXCL;
-                } else if (iolocked & XFS_ILI_IOLOCKED_SHARED) {
-                        lock_flags |= XFS_IOLOCK_SHARED;
-                }
-                xfs_iput(iip->ili_inode, lock_flags);
        }
 }
@@ -725,13 +664,12 @@ xfs_inode_item_unlock(
 * is the only one that matters.  Therefore, simply return the
 * given lsn.
 */
-/*ARGSUSED*/
 STATIC xfs_lsn_t
 xfs_inode_item_committed(
-        xfs_inode_log_item_t    *iip,
+        struct xfs_log_item     *lip,
        xfs_lsn_t               lsn)
 {
-        return (lsn);
+        return lsn;
 }
 /*
@@ -743,13 +681,12 @@ xfs_inode_item_committed(
 */
 STATIC void
 xfs_inode_item_pushbuf(
-        xfs_inode_log_item_t    *iip)
+        struct xfs_log_item     *lip)
 {
-        xfs_inode_t     *ip;
+        struct xfs_inode_log_item *iip = INODE_ITEM(lip);
-        xfs_mount_t     *mp;
+        struct xfs_inode        *ip = iip->ili_inode;
-        xfs_buf_t       *bp;
+        struct xfs_buf          *bp;
-        ip = iip->ili_inode;
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
        /*
@@ -757,14 +694,13 @@ xfs_inode_item_pushbuf(
         * inode was taken off the AIL. So, just get out.
         */
        if (completion_done(&ip->i_flush) ||
-            ((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0)) {
+            !(lip->li_flags & XFS_LI_IN_AIL)) {
                xfs_iunlock(ip, XFS_ILOCK_SHARED);
                return;
        }
-        mp = ip->i_mount;
+        bp = xfs_incore(ip->i_mount->m_ddev_targp, iip->ili_format.ilf_blkno,
-        bp = xfs_incore(mp->m_ddev_targp, iip->ili_format.ilf_blkno,
+                        iip->ili_format.ilf_len, XBF_TRYLOCK);
-                    iip->ili_format.ilf_len, XBF_TRYLOCK);
        xfs_iunlock(ip, XFS_ILOCK_SHARED);
        if (!bp)
@@ -772,10 +708,8 @@ xfs_inode_item_pushbuf(
        if (XFS_BUF_ISDELAYWRITE(bp))
                xfs_buf_delwri_promote(bp);
        xfs_buf_relse(bp);
-        return;
 }
 /*
 * This is called to asynchronously write the inode associated with this
 * inode log item out to disk. The inode will already have been locked by
@@ -783,14 +717,14 @@ xfs_inode_item_pushbuf(
 */
 STATIC void
 xfs_inode_item_push(
-        xfs_inode_log_item_t    *iip)
+        struct xfs_log_item     *lip)
 {
-        xfs_inode_t     *ip;
+        struct xfs_inode_log_item *iip = INODE_ITEM(lip);
+        struct xfs_inode        *ip = iip->ili_inode;
-        ip = iip->ili_inode;
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
        ASSERT(!completion_done(&ip->i_flush));
        /*
         * Since we were able to lock the inode's flush lock and
         * we found it on the AIL, the inode must be dirty.  This
@@ -813,43 +747,34 @@ xfs_inode_item_push(
         */
        (void) xfs_iflush(ip, 0);
        xfs_iunlock(ip, XFS_ILOCK_SHARED);
-        return;
 }
 /*
 * XXX rcc - this one really has to do something.  Probably needs
 * to stamp in a new field in the incore inode.
 */
-/* ARGSUSED */
 STATIC void
 xfs_inode_item_committing(
-        xfs_inode_log_item_t    *iip,
+        struct xfs_log_item     *lip,
        xfs_lsn_t               lsn)
 {
-        iip->ili_last_lsn = lsn;
+        INODE_ITEM(lip)->ili_last_lsn = lsn;
-        return;
 }
 /*
 * This is the ops vector shared by all buf log items.
 */
 static struct xfs_item_ops xfs_inode_item_ops = {
-        .iop_size       = (uint(*)(xfs_log_item_t*))xfs_inode_item_size,
+        .iop_size       = xfs_inode_item_size,
-        .iop_format     = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
+        .iop_format     = xfs_inode_item_format,
-                                        xfs_inode_item_format,
+        .iop_pin        = xfs_inode_item_pin,
-        .iop_pin        = (void(*)(xfs_log_item_t*))xfs_inode_item_pin,
+        .iop_unpin      = xfs_inode_item_unpin,
-        .iop_unpin      = (void(*)(xfs_log_item_t*))xfs_inode_item_unpin,
+        .iop_trylock    = xfs_inode_item_trylock,
-        .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*))
+        .iop_unlock     = xfs_inode_item_unlock,
-                                        xfs_inode_item_unpin_remove,
+        .iop_committed  = xfs_inode_item_committed,
-        .iop_trylock    = (uint(*)(xfs_log_item_t*))xfs_inode_item_trylock,
+        .iop_push       = xfs_inode_item_push,
-        .iop_unlock     = (void(*)(xfs_log_item_t*))xfs_inode_item_unlock,
+        .iop_pushbuf    = xfs_inode_item_pushbuf,
-        .iop_committed  = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
+        .iop_committing = xfs_inode_item_committing
-                                        xfs_inode_item_committed,
-        .iop_push       = (void(*)(xfs_log_item_t*))xfs_inode_item_push,
-        .iop_pushbuf    = (void(*)(xfs_log_item_t*))xfs_inode_item_pushbuf,
-        .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
-                                        xfs_inode_item_committing
 };
@@ -858,10 +783,10 @@ static struct xfs_item_ops xfs_inode_item_ops = {
 */
 void
 xfs_inode_item_init(
-        xfs_inode_t     *ip,
+        struct xfs_inode        *ip,
-        xfs_mount_t     *mp)
+        struct xfs_mount        *mp)
 {
-        xfs_inode_log_item_t    *iip;
+        struct xfs_inode_log_item *iip;
        ASSERT(ip->i_itemp == NULL);
        iip = ip->i_itemp = kmem_zone_zalloc(xfs_ili_zone, KM_SLEEP);
@@ -899,14 +824,14 @@ xfs_inode_item_destroy(
 * from the AIL if it has not been re-logged, and unlocking the inode's
 * flush lock.
 */
-/*ARGSUSED*/
 void
 xfs_iflush_done(
-        xfs_buf_t               *bp,
+        struct xfs_buf          *bp,
-        xfs_inode_log_item_t    *iip)
+        struct xfs_log_item     *lip)
 {
+        struct xfs_inode_log_item *iip = INODE_ITEM(lip);
        xfs_inode_t             *ip = iip->ili_inode;
-        struct xfs_ail          *ailp = iip->ili_item.li_ailp;
+        struct xfs_ail          *ailp = lip->li_ailp;
        /*
         * We only want to pull the item from the AIL if it is
@@ -917,12 +842,11 @@ xfs_iflush_done(
         * the lock since it's cheaper, and then we recheck while
         * holding the lock before removing the inode from the AIL.
         */
-        if (iip->ili_logged &&
+        if (iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn) {
-            (iip->ili_item.li_lsn == iip->ili_flush_lsn)) {
                spin_lock(&ailp->xa_lock);
-                if (iip->ili_item.li_lsn == iip->ili_flush_lsn) {
+                if (lip->li_lsn == iip->ili_flush_lsn) {
                        /* xfs_trans_ail_delete() drops the AIL lock. */
-                        xfs_trans_ail_delete(ailp, (xfs_log_item_t*)iip);
+                        xfs_trans_ail_delete(ailp, lip);
                } else {
                        spin_unlock(&ailp->xa_lock);
                }
@@ -940,8 +864,6 @@ xfs_iflush_done(
         * Release the inode's flush lock since we're done with it.
         */
        xfs_ifunlock(ip);
-        return;
 }
 /*
@@ -957,10 +879,8 @@ xfs_iflush_abort(
        xfs_inode_t             *ip)
 {
        xfs_inode_log_item_t    *iip = ip->i_itemp;
-        xfs_mount_t             *mp;
        iip = ip->i_itemp;
-        mp = ip->i_mount;
        if (iip) {
                struct xfs_ail  *ailp = iip->ili_item.li_ailp;
                if (iip->ili_item.li_flags & XFS_LI_IN_AIL) {
@@ -991,10 +911,10 @@ xfs_iflush_abort(
 void
 xfs_istale_done(
-        xfs_buf_t               *bp,
+        struct xfs_buf          *bp,
-        xfs_inode_log_item_t    *iip)
+        struct xfs_log_item     *lip)
 {
-        xfs_iflush_abort(iip->ili_inode);
+        xfs_iflush_abort(INODE_ITEM(lip)->ili_inode);
 }
 /*
@@ -1007,9 +927,8 @@ xfs_inode_item_format_convert(
        xfs_inode_log_format_t  *in_f)
 {
        if (buf->i_len == sizeof(xfs_inode_log_format_32_t)) {
-                xfs_inode_log_format_32_t *in_f32;
+                xfs_inode_log_format_32_t *in_f32 = buf->i_addr;
-                in_f32 = (xfs_inode_log_format_32_t *)buf->i_addr;
                in_f->ilf_type = in_f32->ilf_type;
                in_f->ilf_size = in_f32->ilf_size;
                in_f->ilf_fields = in_f32->ilf_fields;
@@ -1025,9 +944,8 @@ xfs_inode_item_format_convert(
                in_f->ilf_boffset = in_f32->ilf_boffset;
                return 0;
        } else if (buf->i_len == sizeof(xfs_inode_log_format_64_t)){
-                xfs_inode_log_format_64_t *in_f64;
+                xfs_inode_log_format_64_t *in_f64 = buf->i_addr;
-                in_f64 = (xfs_inode_log_format_64_t *)buf->i_addr;
                in_f->ilf_type = in_f64->ilf_type;
                in_f->ilf_size = in_f64->ilf_size;
                in_f->ilf_fields = in_f64->ilf_fields;
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index 9a467958ecdd..d3dee61e6d91 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -103,12 +103,6 @@ typedef struct xfs_inode_log_format_64 {
                                 XFS_ILOG_ADATA | XFS_ILOG_AEXT | \
                                 XFS_ILOG_ABROOT)
-#define XFS_ILI_HOLD            0x1
-#define XFS_ILI_IOLOCKED_EXCL   0x2
-#define XFS_ILI_IOLOCKED_SHARED 0x4
-#define XFS_ILI_IOLOCKED_ANY   (XFS_ILI_IOLOCKED_EXCL | XFS_ILI_IOLOCKED_SHARED)
 static inline int xfs_ilog_fbroot(int w)
 {
        return (w == XFS_DATA_FORK ? XFS_ILOG_DBROOT : XFS_ILOG_ABROOT);
@@ -137,7 +131,7 @@ typedef struct xfs_inode_log_item {
        struct xfs_inode        *ili_inode;        /* inode ptr */
        xfs_lsn_t               ili_flush_lsn;     /* lsn at last flush */
        xfs_lsn_t               ili_last_lsn;      /* lsn at last transaction */
-        unsigned short          ili_flags;         /* misc flags */
+        unsigned short          ili_lock_flags;    /* lock flags */
        unsigned short          ili_logged;        /* flushed logged data */
        unsigned int            ili_last_fields;   /* fields when flushed */
        struct xfs_bmbt_rec     *ili_extents_buf;  /* array of logged
@@ -161,8 +155,8 @@ static inline int xfs_inode_clean(xfs_inode_t *ip)
 extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *);
 extern void xfs_inode_item_destroy(struct xfs_inode *);
-extern void xfs_iflush_done(struct xfs_buf *, xfs_inode_log_item_t *);
+extern void xfs_iflush_done(struct xfs_buf *, struct xfs_log_item *);
-extern void xfs_istale_done(struct xfs_buf *, xfs_inode_log_item_t *);
+extern void xfs_istale_done(struct xfs_buf *, struct xfs_log_item *);
 extern void xfs_iflush_abort(struct xfs_inode *);
 extern int xfs_inode_item_format_convert(xfs_log_iovec_t *,
                                         xfs_inode_log_format_t *);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index ef14943829da..20576146369f 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -23,19 +23,14 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
 #include "xfs_alloc.h"
-#include "xfs_dmapi.h"
 #include "xfs_quota.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
-#include "xfs_ialloc.h"
 #include "xfs_btree.h"
 #include "xfs_bmap.h"
 #include "xfs_rtalloc.h"
@@ -123,7 +118,7 @@ xfs_iomap(
        error = xfs_bmapi(NULL, ip, offset_fsb,
                        (xfs_filblks_t)(end_fsb - offset_fsb),
                        bmapi_flags,  NULL, 0, imap,
-                        nimaps, NULL, NULL);
+                        nimaps, NULL);
        if (error)
                goto out;
@@ -138,7 +133,7 @@ xfs_iomap(
                        break;
                }
-                if (flags & (BMAPI_DIRECT|BMAPI_MMAP)) {
+                if (flags & BMAPI_DIRECT) {
                        error = xfs_iomap_write_direct(ip, offset, count, flags,
                                                       imap, nimaps);
                } else {
@@ -247,7 +242,7 @@ xfs_iomap_write_direct(
        xfs_off_t       offset,
        size_t          count,
        int             flags,
-        xfs_bmbt_irec_t *ret_imap,
+        xfs_bmbt_irec_t *imap,
        int             *nmaps)
 {
        xfs_mount_t     *mp = ip->i_mount;
@@ -261,7 +256,6 @@ xfs_iomap_write_direct(
        int             quota_flag;
        int             rt;
        xfs_trans_t     *tp;
-        xfs_bmbt_irec_t imap;
        xfs_bmap_free_t free_list;
        uint            qblocks, resblks, resrtextents;
        int             committed;
@@ -285,10 +279,10 @@ xfs_iomap_write_direct(
                if (error)
                        goto error_out;
        } else {
-                if (*nmaps && (ret_imap->br_startblock == HOLESTARTBLOCK))
+                if (*nmaps && (imap->br_startblock == HOLESTARTBLOCK))
                        last_fsb = MIN(last_fsb, (xfs_fileoff_t)
-                                        ret_imap->br_blockcount +
+                                        imap->br_blockcount +
-                                        ret_imap->br_startoff);
+                                        imap->br_startoff);
        }
        count_fsb = last_fsb - offset_fsb;
        ASSERT(count_fsb > 0);
@@ -334,20 +328,22 @@ xfs_iomap_write_direct(
        if (error)
                goto error1;
-        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+        xfs_trans_ijoin(tp, ip);
-        xfs_trans_ihold(tp, ip);
        bmapi_flag = XFS_BMAPI_WRITE;
        if ((flags & BMAPI_DIRECT) && (offset < ip->i_size || extsz))
                bmapi_flag |= XFS_BMAPI_PREALLOC;
        /*
-         * Issue the xfs_bmapi() call to allocate the blocks
+         * Issue the xfs_bmapi() call to allocate the blocks.
+         *
+         * From this point onwards we overwrite the imap pointer that the
+         * caller gave to us.
         */
        xfs_bmap_init(&free_list, &firstfsb);
        nimaps = 1;
        error = xfs_bmapi(tp, ip, offset_fsb, count_fsb, bmapi_flag,
-                &firstfsb, 0, &imap, &nimaps, &free_list, NULL);
+                &firstfsb, 0, imap, &nimaps, &free_list);
        if (error)
                goto error0;
@@ -369,12 +365,11 @@ xfs_iomap_write_direct(
                goto error_out;
        }
-        if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) {
+        if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip))) {
-                error = xfs_cmn_err_fsblock_zero(ip, &imap);
+                error = xfs_cmn_err_fsblock_zero(ip, imap);
                goto error_out;
        }
-        *ret_imap = imap;
        *nmaps = 1;
        return 0;
@@ -425,7 +420,7 @@ xfs_iomap_eof_want_preallocate(
                imaps = nimaps;
                firstblock = NULLFSBLOCK;
                error = xfs_bmapi(NULL, ip, start_fsb, count_fsb, 0,
-                                  &firstblock, 0, imap, &imaps, NULL, NULL);
+                                  &firstblock, 0, imap, &imaps, NULL);
                if (error)
                        return error;
                for (n = 0; n < imaps; n++) {
@@ -500,7 +495,7 @@ retry:
                          (xfs_filblks_t)(last_fsb - offset_fsb),
                          XFS_BMAPI_DELAY | XFS_BMAPI_WRITE |
                          XFS_BMAPI_ENTIRE, &firstblock, 1, imap,
-                          &nimaps, NULL, NULL);
+                          &nimaps, NULL);
        if (error && (error != ENOSPC))
                return XFS_ERROR(error);
@@ -548,7 +543,7 @@ xfs_iomap_write_allocate(
        xfs_inode_t     *ip,
        xfs_off_t       offset,
        size_t          count,
-        xfs_bmbt_irec_t *map,
+        xfs_bmbt_irec_t *imap,
        int             *retmap)
 {
        xfs_mount_t     *mp = ip->i_mount;
@@ -557,7 +552,6 @@ xfs_iomap_write_allocate(
        xfs_fsblock_t   first_block;
        xfs_bmap_free_t free_list;
        xfs_filblks_t   count_fsb;
-        xfs_bmbt_irec_t imap;
        xfs_trans_t     *tp;
        int             nimaps, committed;
        int             error = 0;
@@ -573,8 +567,8 @@ xfs_iomap_write_allocate(
                return XFS_ERROR(error);
        offset_fsb = XFS_B_TO_FSBT(mp, offset);
-        count_fsb = map->br_blockcount;
+        count_fsb = imap->br_blockcount;
-        map_start_fsb = map->br_startoff;
+        map_start_fsb = imap->br_startoff;
        XFS_STATS_ADD(xs_xstrat_bytes, XFS_FSB_TO_B(mp, count_fsb));
@@ -602,8 +596,7 @@ xfs_iomap_write_allocate(
                                return XFS_ERROR(error);
                        }
                        xfs_ilock(ip, XFS_ILOCK_EXCL);
-                        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+                        xfs_trans_ijoin(tp, ip);
-                        xfs_trans_ihold(tp, ip);
                        xfs_bmap_init(&free_list, &first_block);
@@ -654,10 +647,15 @@ xfs_iomap_write_allocate(
                                }
                        }
-                        /* Go get the actual blocks */
+                        /*
+                         * Go get the actual blocks.
+                         *
+                         * From this point onwards we overwrite the imap
+                         * pointer that the caller gave to us.
+                         */
                        error = xfs_bmapi(tp, ip, map_start_fsb, count_fsb,
                                        XFS_BMAPI_WRITE, &first_block, 1,
-                                        &imap, &nimaps, &free_list, NULL);
+                                        imap, &nimaps, &free_list);
                        if (error)
                                goto trans_cancel;
@@ -676,13 +674,12 @@ xfs_iomap_write_allocate(
                 * See if we were able to allocate an extent that
                 * covers at least part of the callers request
                 */
-                if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip)))
+                if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip)))
-                        return xfs_cmn_err_fsblock_zero(ip, &imap);
+                        return xfs_cmn_err_fsblock_zero(ip, imap);
-                if ((offset_fsb >= imap.br_startoff) &&
+                if ((offset_fsb >= imap->br_startoff) &&
-                    (offset_fsb < (imap.br_startoff +
+                    (offset_fsb < (imap->br_startoff +
-                                   imap.br_blockcount))) {
+                                   imap->br_blockcount))) {
-                        *map = imap;
                        *retmap = 1;
                        XFS_STATS_INC(xs_xstrat_quick);
                        return 0;
@@ -692,8 +689,8 @@ xfs_iomap_write_allocate(
                 * So far we have not mapped the requested part of the
                 * file, just surrounding data, try again.
                 */
-                count_fsb -= imap.br_blockcount;
+                count_fsb -= imap->br_blockcount;
-                map_start_fsb = imap.br_startoff + imap.br_blockcount;
+                map_start_fsb = imap->br_startoff + imap->br_blockcount;
        }
 trans_cancel:
@@ -766,8 +763,7 @@ xfs_iomap_write_unwritten(
                }
                xfs_ilock(ip, XFS_ILOCK_EXCL);
-                xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+                xfs_trans_ijoin(tp, ip);
-                xfs_trans_ihold(tp, ip);
                /*
                 * Modify the unwritten extent state of the buffer.
@@ -776,7 +772,7 @@ xfs_iomap_write_unwritten(
                nimaps = 1;
                error = xfs_bmapi(tp, ip, offset_fsb, count_fsb,
                                  XFS_BMAPI_WRITE|XFS_BMAPI_CONVERT, &firstfsb,
-                                  1, &imap, &nimaps, &free_list, NULL);
+                                  1, &imap, &nimaps, &free_list);
                if (error)
                        goto error_on_bmapi_transaction;
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index 81ac4afd45b3..7748a430f50d 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -18,17 +18,16 @@
 #ifndef __XFS_IOMAP_H__
 #define __XFS_IOMAP_H__
-typedef enum {
+/* base extent manipulation calls */
-        /* base extent manipulation calls */
+#define BMAPI_READ      (1 << 0)        /* read extents */
-        BMAPI_READ = (1 << 0),          /* read extents */
+#define BMAPI_WRITE     (1 << 1)        /* create extents */
-        BMAPI_WRITE = (1 << 1),         /* create extents */
+#define BMAPI_ALLOCATE  (1 << 2)        /* delayed allocate to real extents */
-        BMAPI_ALLOCATE = (1 << 2),      /* delayed allocate to real extents */
-        /* modifiers */
+/* modifiers */
-        BMAPI_IGNSTATE = (1 << 4),      /* ignore unwritten state on read */
+#define BMAPI_IGNSTATE  (1 << 4)        /* ignore unwritten state on read */
-        BMAPI_DIRECT = (1 << 5),        /* direct instead of buffered write */
+#define BMAPI_DIRECT    (1 << 5)        /* direct instead of buffered write */
-        BMAPI_MMAP = (1 << 6),          /* allocate for mmap write */
+#define BMAPI_MMA       (1 << 6)        /* allocate for mmap write */
-        BMAPI_TRYLOCK = (1 << 7),       /* non-blocking request */
+#define BMAPI_TRYLOCK   (1 << 7)        /* non-blocking request */
-} bmapi_flags_t;
 #define BMAPI_FLAGS \
        { BMAPI_READ,           "READ" }, \
@@ -36,7 +35,6 @@ typedef enum {
        { BMAPI_ALLOCATE,       "ALLOCATE" }, \
        { BMAPI_IGNSTATE,       "IGNSTATE" }, \
        { BMAPI_DIRECT,         "DIRECT" }, \
-        { BMAPI_MMAP,           "MMAP" }, \
        { BMAPI_TRYLOCK,        "TRYLOCK" }
 struct xfs_inode;
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 2b86f8610512..dc1882adaf54 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -24,20 +24,17 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_ialloc.h"
 #include "xfs_itable.h"
 #include "xfs_error.h"
 #include "xfs_btree.h"
+#include "xfs_trace.h"
 STATIC int
 xfs_internal_inum(
@@ -95,7 +92,8 @@ xfs_bulkstat_one_int(
         * further change.
         */
        buf->bs_nlink = dic->di_nlink;
-        buf->bs_projid = dic->di_projid;
+        buf->bs_projid_lo = dic->di_projid_lo;
+        buf->bs_projid_hi = dic->di_projid_hi;
        buf->bs_ino = ino;
        buf->bs_mode = dic->di_mode;
        buf->bs_uid = dic->di_uid;
@@ -143,7 +141,8 @@ xfs_bulkstat_one_int(
                buf->bs_blocks = dic->di_nblocks + ip->i_delayed_blks;
                break;
        }
-        xfs_iput(ip, XFS_ILOCK_SHARED);
+        xfs_iunlock(ip, XFS_ILOCK_SHARED);
+        IRELE(ip);
        error = formatter(buffer, ubsize, ubused, buf);
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 5215abc8023a..cee4ab9f8a9e 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -24,8 +24,6 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_error.h"
 #include "xfs_log_priv.h"
@@ -35,8 +33,6 @@
 #include "xfs_ialloc_btree.h"
 #include "xfs_log_recover.h"
 #include "xfs_trans_priv.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_rw.h"
@@ -337,7 +333,6 @@ xfs_log_reserve(
        int                     retval = 0;
        ASSERT(client == XFS_TRANSACTION || client == XFS_LOG);
-        ASSERT((flags & XFS_LOG_NOSLEEP) == 0);
        if (XLOG_FORCED_SHUTDOWN(log))
                return XFS_ERROR(EIO);
@@ -552,7 +547,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
                                .magic = XLOG_UNMOUNT_TYPE,
                        };
                        struct xfs_log_iovec reg = {
-                                .i_addr = (void *)&magic,
+                                .i_addr = &magic,
                                .i_len = sizeof(magic),
                                .i_type = XLOG_REG_TYPE_UNMOUNT,
                        };
@@ -922,19 +917,6 @@ xlog_iodone(xfs_buf_t *bp)
        l = iclog->ic_log;
        /*
-         * If the _XFS_BARRIER_FAILED flag was set by a lower
-         * layer, it means the underlying device no longer supports
-         * barrier I/O. Warn loudly and turn off barriers.
-         */
-        if (bp->b_flags & _XFS_BARRIER_FAILED) {
-                bp->b_flags &= ~_XFS_BARRIER_FAILED;
-                l->l_mp->m_flags &= ~XFS_MOUNT_BARRIER;
-                xfs_fs_cmn_err(CE_WARN, l->l_mp,
-                                "xlog_iodone: Barriers are no longer supported"
-                                " by device. Disabling barriers\n");
-        }
-        /*
         * Race to shutdown the filesystem if we see an error.
         */
        if (XFS_TEST_ERROR((XFS_BUF_GETERROR(bp)), l->l_mp,
@@ -1047,7 +1029,6 @@ xlog_alloc_log(xfs_mount_t	*mp,
        xlog_in_core_t          *iclog, *prev_iclog=NULL;
        xfs_buf_t               *bp;
        int                     i;
-        int                     iclogsize;
        int                     error = ENOMEM;
        uint                    log2_size = 0;
@@ -1127,7 +1108,6 @@ xlog_alloc_log(xfs_mount_t	*mp,
         * with different amounts of memory.  See the definition of
         * xlog_in_core_t in xfs_log_priv.h for details.
         */
-        iclogsize = log->l_iclog_size;
        ASSERT(log->l_iclog_size >= 4096);
        for (i=0; i < log->l_iclog_bufs; i++) {
                *iclogp = kmem_zalloc(sizeof(xlog_in_core_t), KM_MAYFAIL);
@@ -1138,7 +1118,8 @@ xlog_alloc_log(xfs_mount_t	*mp,
                iclog->ic_prev = prev_iclog;
                prev_iclog = iclog;
-                bp = xfs_buf_get_noaddr(log->l_iclog_size, mp->m_logdev_targp);
+                bp = xfs_buf_get_uncached(mp->m_logdev_targp,
+                                                log->l_iclog_size, 0);
                if (!bp)
                        goto out_free_iclog;
                if (!XFS_BUF_CPSEMA(bp))
@@ -1316,7 +1297,7 @@ xlog_bdstrat(
        if (iclog->ic_state & XLOG_STATE_IOERROR) {
                XFS_BUF_ERROR(bp, EIO);
                XFS_BUF_STALE(bp);
-                xfs_biodone(bp);
+                xfs_buf_ioend(bp, 0);
                /*
                 * It would seem logical to return EIO here, but we rely on
                 * the log state machine to propagate I/O errors instead of
@@ -1428,11 +1409,8 @@ xlog_sync(xlog_t		*log,
        XFS_BUF_BUSY(bp);
        XFS_BUF_ASYNC(bp);
        bp->b_flags |= XBF_LOG_BUFFER;
-        /*
-         * Do an ordered write for the log block.
+        if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
-         * Its unnecessary to flush the first split block in the log wrap case.
-         */
-        if (!split && (log->l_mp->m_flags & XFS_MOUNT_BARRIER))
                XFS_BUF_ORDERED(bp);
        ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
@@ -3025,7 +3003,8 @@ _xfs_log_force(
        XFS_STATS_INC(xs_log_force);
-        xlog_cil_push(log, 1);
+        if (log->l_cilp)
+                xlog_cil_force(log);
        spin_lock(&log->l_icloglock);
@@ -3177,7 +3156,7 @@ _xfs_log_force_lsn(
        XFS_STATS_INC(xs_log_force);
        if (log->l_cilp) {
-                lsn = xlog_cil_push_lsn(log, lsn);
+                lsn = xlog_cil_force_lsn(log, lsn);
                if (lsn == NULLCOMMITLSN)
                        return 0;
        }
@@ -3734,7 +3713,7 @@ xfs_log_force_umount(
         * call below.
         */
        if (!logerror && (mp->m_flags & XFS_MOUNT_DELAYLOG))
-                xlog_cil_push(log, 1);
+                xlog_cil_force(log);
        /*
         * We must hold both the GRANT lock and the LOG lock,
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 04c78e642cc8..916eb7db14d9 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -55,14 +55,10 @@ static inline xfs_lsn_t	_lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
 /*
 * Flags to xfs_log_reserve()
 *
- *      XFS_LOG_SLEEP:   If space is not available, sleep (default)
- *      XFS_LOG_NOSLEEP: If space is not available, return error
 *      XFS_LOG_PERM_RESERV: Permanent reservation.  When writes are
 *              performed against this type of reservation, the reservation
 *              is not decreased.  Long running transactions should use this.
 */
-#define XFS_LOG_SLEEP           0x0
-#define XFS_LOG_NOSLEEP         0x1
 #define XFS_LOG_PERM_RESERV     0x2
 /*
@@ -104,7 +100,7 @@ static inline xfs_lsn_t	_lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
 #define XLOG_REG_TYPE_MAX               19
 typedef struct xfs_log_iovec {
-        xfs_caddr_t     i_addr;         /* beginning address of region */
+        void            *i_addr;        /* beginning address of region */
        int             i_len;          /* length in bytes of region */
        uint            i_type;         /* type of region */
 } xfs_log_iovec_t;
@@ -201,9 +197,4 @@ int	xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
 bool    xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
 #endif
-extern int xlog_debug;          /* set to 1 to enable real log */
 #endif  /* __XFS_LOG_H__ */
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index bb17cc044bf3..23d6ceb5e97b 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -26,8 +26,6 @@
 #include "xfs_log_priv.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_error.h"
 #include "xfs_alloc.h"
@@ -70,6 +68,7 @@ xlog_cil_init(
        ctx->sequence = 1;
        ctx->cil = cil;
        cil->xc_ctx = ctx;
+        cil->xc_current_sequence = ctx->sequence;
        cil->xc_log = log;
        log->l_cilp = cil;
@@ -147,102 +146,6 @@ xlog_cil_init_post_recovery(
 }
 /*
- * Insert the log item into the CIL and calculate the difference in space
- * consumed by the item. Add the space to the checkpoint ticket and calculate
- * if the change requires additional log metadata. If it does, take that space
- * as well. Remove the amount of space we addded to the checkpoint ticket from
- * the current transaction ticket so that the accounting works out correctly.
- *
- * If this is the first time the item is being placed into the CIL in this
- * context, pin it so it can't be written to disk until the CIL is flushed to
- * the iclog and the iclog written to disk.
- */
-static void
-xlog_cil_insert(
-        struct log              *log,
-        struct xlog_ticket      *ticket,
-        struct xfs_log_item     *item,
-        struct xfs_log_vec      *lv)
-{
-        struct xfs_cil          *cil = log->l_cilp;
-        struct xfs_log_vec      *old = lv->lv_item->li_lv;
-        struct xfs_cil_ctx      *ctx = cil->xc_ctx;
-        int                     len;
-        int                     diff_iovecs;
-        int                     iclog_space;
-        if (old) {
-                /* existing lv on log item, space used is a delta */
-                ASSERT(!list_empty(&item->li_cil));
-                ASSERT(old->lv_buf && old->lv_buf_len && old->lv_niovecs);
-                len = lv->lv_buf_len - old->lv_buf_len;
-                diff_iovecs = lv->lv_niovecs - old->lv_niovecs;
-                kmem_free(old->lv_buf);
-                kmem_free(old);
-        } else {
-                /* new lv, must pin the log item */
-                ASSERT(!lv->lv_item->li_lv);
-                ASSERT(list_empty(&item->li_cil));
-                len = lv->lv_buf_len;
-                diff_iovecs = lv->lv_niovecs;
-                IOP_PIN(lv->lv_item);
-        }
-        len += diff_iovecs * sizeof(xlog_op_header_t);
-        /* attach new log vector to log item */
-        lv->lv_item->li_lv = lv;
-        spin_lock(&cil->xc_cil_lock);
-        list_move_tail(&item->li_cil, &cil->xc_cil);
-        ctx->nvecs += diff_iovecs;
-        /*
-         * If this is the first time the item is being committed to the CIL,
-         * store the sequence number on the log item so we can tell
-         * in future commits whether this is the first checkpoint the item is
-         * being committed into.
-         */
-        if (!item->li_seq)
-                item->li_seq = ctx->sequence;
-        /*
-         * Now transfer enough transaction reservation to the context ticket
-         * for the checkpoint. The context ticket is special - the unit
-         * reservation has to grow as well as the current reservation as we
-         * steal from tickets so we can correctly determine the space used
-         * during the transaction commit.
-         */
-        if (ctx->ticket->t_curr_res == 0) {
-                /* first commit in checkpoint, steal the header reservation */
-                ASSERT(ticket->t_curr_res >= ctx->ticket->t_unit_res + len);
-                ctx->ticket->t_curr_res = ctx->ticket->t_unit_res;
-                ticket->t_curr_res -= ctx->ticket->t_unit_res;
-        }
-        /* do we need space for more log record headers? */
-        iclog_space = log->l_iclog_size - log->l_iclog_hsize;
-        if (len > 0 && (ctx->space_used / iclog_space !=
-                                (ctx->space_used + len) / iclog_space)) {
-                int hdrs;
-                hdrs = (len + iclog_space - 1) / iclog_space;
-                /* need to take into account split region headers, too */
-                hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
-                ctx->ticket->t_unit_res += hdrs;
-                ctx->ticket->t_curr_res += hdrs;
-                ticket->t_curr_res -= hdrs;
-                ASSERT(ticket->t_curr_res >= len);
-        }
-        ticket->t_curr_res -= len;
-        ctx->space_used += len;
-        spin_unlock(&cil->xc_cil_lock);
-}
-/*
 * Format log item into a flat buffers
 *
 * For delayed logging, we need to hold a formatted buffer containing all the
@@ -271,15 +174,10 @@ xlog_cil_insert(
 static void
 xlog_cil_format_items(
        struct log              *log,
-        struct xfs_log_vec      *log_vector,
+        struct xfs_log_vec      *log_vector)
-        struct xlog_ticket      *ticket,
-        xfs_lsn_t               *start_lsn)
 {
        struct xfs_log_vec *lv;
-        if (start_lsn)
-                *start_lsn = log->l_cilp->xc_ctx->sequence;
        ASSERT(log_vector);
        for (lv = log_vector; lv; lv = lv->lv_next) {
                void    *ptr;
@@ -292,7 +190,7 @@ xlog_cil_format_items(
                        len += lv->lv_iovecp[index].i_len;
                lv->lv_buf_len = len;
-                lv->lv_buf = kmem_zalloc(lv->lv_buf_len, KM_SLEEP|KM_NOFS);
+                lv->lv_buf = kmem_alloc(lv->lv_buf_len, KM_SLEEP|KM_NOFS);
                ptr = lv->lv_buf;
                for (index = 0; index < lv->lv_niovecs; index++) {
@@ -303,97 +201,153 @@ xlog_cil_format_items(
                        ptr += vec->i_len;
                }
                ASSERT(ptr == lv->lv_buf + lv->lv_buf_len);
-                xlog_cil_insert(log, ticket, lv->lv_item, lv);
        }
 }
-static void
+/*
-xlog_cil_free_logvec(
+ * Prepare the log item for insertion into the CIL. Calculate the difference in
-        struct xfs_log_vec      *log_vector)
+ * log space and vectors it will consume, and if it is a new item pin it as
+ * well.
+ */
+STATIC void
+xfs_cil_prepare_item(
+        struct log              *log,
+        struct xfs_log_vec      *lv,
+        int                     *len,
+        int                     *diff_iovecs)
 {
-        struct xfs_log_vec      *lv;
+        struct xfs_log_vec      *old = lv->lv_item->li_lv;
+        if (old) {
+                /* existing lv on log item, space used is a delta */
+                ASSERT(!list_empty(&lv->lv_item->li_cil));
+                ASSERT(old->lv_buf && old->lv_buf_len && old->lv_niovecs);
+                *len += lv->lv_buf_len - old->lv_buf_len;
+                *diff_iovecs += lv->lv_niovecs - old->lv_niovecs;
+                kmem_free(old->lv_buf);
+                kmem_free(old);
+        } else {
+                /* new lv, must pin the log item */
+                ASSERT(!lv->lv_item->li_lv);
+                ASSERT(list_empty(&lv->lv_item->li_cil));
+                *len += lv->lv_buf_len;
+                *diff_iovecs += lv->lv_niovecs;
+                IOP_PIN(lv->lv_item);
-        for (lv = log_vector; lv; ) {
-                struct xfs_log_vec *next = lv->lv_next;
-                kmem_free(lv->lv_buf);
-                kmem_free(lv);
-                lv = next;
        }
+        /* attach new log vector to log item */
+        lv->lv_item->li_lv = lv;
+        /*
+         * If this is the first time the item is being committed to the
+         * CIL, store the sequence number on the log item so we can
+         * tell in future commits whether this is the first checkpoint
+         * the item is being committed into.
+         */
+        if (!lv->lv_item->li_seq)
+                lv->lv_item->li_seq = log->l_cilp->xc_ctx->sequence;
 }
 /*
- * Commit a transaction with the given vector to the Committed Item List.
+ * Insert the log items into the CIL and calculate the difference in space
- *
+ * consumed by the item. Add the space to the checkpoint ticket and calculate
- * To do this, we need to format the item, pin it in memory if required and
+ * if the change requires additional log metadata. If it does, take that space
- * account for the space used by the transaction. Once we have done that we
+ * as well. Remove the amount of space we addded to the checkpoint ticket from
- * need to release the unused reservation for the transaction, attach the
+ * the current transaction ticket so that the accounting works out correctly.
- * transaction to the checkpoint context so we carry the busy extents through
- * to checkpoint completion, and then unlock all the items in the transaction.
- *
- * For more specific information about the order of operations in
- * xfs_log_commit_cil() please refer to the comments in
- * xfs_trans_commit_iclog().
- *
- * Called with the context lock already held in read mode to lock out
- * background commit, returns without it held once background commits are
- * allowed again.
 */
-int
+static void
-xfs_log_commit_cil(
+xlog_cil_insert_items(
-        struct xfs_mount        *mp,
+        struct log              *log,
-        struct xfs_trans        *tp,
        struct xfs_log_vec      *log_vector,
-        xfs_lsn_t               *commit_lsn,
+        struct xlog_ticket      *ticket)
-        int                     flags)
 {
-        struct log              *log = mp->m_log;
+        struct xfs_cil          *cil = log->l_cilp;
-        int                     log_flags = 0;
+        struct xfs_cil_ctx      *ctx = cil->xc_ctx;
-        int                     push = 0;
+        struct xfs_log_vec      *lv;
+        int                     len = 0;
-        if (flags & XFS_TRANS_RELEASE_LOG_RES)
+        int                     diff_iovecs = 0;
-                log_flags = XFS_LOG_REL_PERM_RESERV;
+        int                     iclog_space;
-        if (XLOG_FORCED_SHUTDOWN(log)) {
+        ASSERT(log_vector);
-                xlog_cil_free_logvec(log_vector);
-                return XFS_ERROR(EIO);
-        }
-        /* lock out background commit */
+        /*
-        down_read(&log->l_cilp->xc_ctx_lock);
+         * Do all the accounting aggregation and switching of log vectors
-        xlog_cil_format_items(log, log_vector, tp->t_ticket, commit_lsn);
+         * around in a separate loop to the insertion of items into the CIL.
+         * Then we can do a separate loop to update the CIL within a single
+         * lock/unlock pair. This reduces the number of round trips on the CIL
+         * lock from O(nr_logvectors) to O(1) and greatly reduces the overall
+         * hold time for the transaction commit.
+         *
+         * If this is the first time the item is being placed into the CIL in
+         * this context, pin it so it can't be written to disk until the CIL is
+         * flushed to the iclog and the iclog written to disk.
+         *
+         * We can do this safely because the context can't checkpoint until we
+         * are done so it doesn't matter exactly how we update the CIL.
+         */
+        for (lv = log_vector; lv; lv = lv->lv_next)
+                xfs_cil_prepare_item(log, lv, &len, &diff_iovecs);
-        /* check we didn't blow the reservation */
+        /* account for space used by new iovec headers  */
-        if (tp->t_ticket->t_curr_res < 0)
+        len += diff_iovecs * sizeof(xlog_op_header_t);
-                xlog_print_tic_res(log->l_mp, tp->t_ticket);
-        /* attach the transaction to the CIL if it has any busy extents */
+        spin_lock(&cil->xc_cil_lock);
-        if (!list_empty(&tp->t_busy)) {
-                spin_lock(&log->l_cilp->xc_cil_lock);
-                list_splice_init(&tp->t_busy,
-                                        &log->l_cilp->xc_ctx->busy_extents);
-                spin_unlock(&log->l_cilp->xc_cil_lock);
-        }
-        tp->t_commit_lsn = *commit_lsn;
+        /* move the items to the tail of the CIL */
-        xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
+        for (lv = log_vector; lv; lv = lv->lv_next)
-        xfs_trans_unreserve_and_mod_sb(tp);
+                list_move_tail(&lv->lv_item->li_cil, &cil->xc_cil);
-        /* check for background commit before unlock */
+        ctx->nvecs += diff_iovecs;
-        if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log))
-                push = 1;
-        up_read(&log->l_cilp->xc_ctx_lock);
        /*
-         * We need to push CIL every so often so we don't cache more than we
+         * Now transfer enough transaction reservation to the context ticket
-         * can fit in the log. The limit really is that a checkpoint can't be
+         * for the checkpoint. The context ticket is special - the unit
-         * more than half the log (the current checkpoint is not allowed to
+         * reservation has to grow as well as the current reservation as we
-         * overwrite the previous checkpoint), but commit latency and memory
+         * steal from tickets so we can correctly determine the space used
-         * usage limit this to a smaller size in most cases.
+         * during the transaction commit.
         */
-        if (push)
+        if (ctx->ticket->t_curr_res == 0) {
-                xlog_cil_push(log, 0);
+                /* first commit in checkpoint, steal the header reservation */
-        return 0;
+                ASSERT(ticket->t_curr_res >= ctx->ticket->t_unit_res + len);
+                ctx->ticket->t_curr_res = ctx->ticket->t_unit_res;
+                ticket->t_curr_res -= ctx->ticket->t_unit_res;
+        }
+        /* do we need space for more log record headers? */
+        iclog_space = log->l_iclog_size - log->l_iclog_hsize;
+        if (len > 0 && (ctx->space_used / iclog_space !=
+                                (ctx->space_used + len) / iclog_space)) {
+                int hdrs;
+                hdrs = (len + iclog_space - 1) / iclog_space;
+                /* need to take into account split region headers, too */
+                hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
+                ctx->ticket->t_unit_res += hdrs;
+                ctx->ticket->t_curr_res += hdrs;
+                ticket->t_curr_res -= hdrs;
+                ASSERT(ticket->t_curr_res >= len);
+        }
+        ticket->t_curr_res -= len;
+        ctx->space_used += len;
+        spin_unlock(&cil->xc_cil_lock);
+}
+static void
+xlog_cil_free_logvec(
+        struct xfs_log_vec      *log_vector)
+{
+        struct xfs_log_vec      *lv;
+        for (lv = log_vector; lv; ) {
+                struct xfs_log_vec *next = lv->lv_next;
+                kmem_free(lv->lv_buf);
+                kmem_free(lv);
+                lv = next;
+        }
 }
 /*
@@ -429,13 +383,23 @@ xlog_cil_committed(
 }
 /*
- * Push the Committed Item List to the log. If the push_now flag is not set,
+ * Push the Committed Item List to the log. If @push_seq flag is zero, then it
- * then it is a background flush and so we can chose to ignore it.
+ * is a background flush and so we can chose to ignore it. Otherwise, if the
+ * current sequence is the same as @push_seq we need to do a flush. If
+ * @push_seq is less than the current sequence, then it has already been
+ * flushed and we don't need to do anything - the caller will wait for it to
+ * complete if necessary.
+ *
+ * @push_seq is a value rather than a flag because that allows us to do an
+ * unlocked check of the sequence number for a match. Hence we can allows log
+ * forces to run racily and not issue pushes for the same sequence twice. If we
+ * get a race between multiple pushes for the same sequence they will block on
+ * the first one and then abort, hence avoiding needless pushes.
 */
-int
+STATIC int
 xlog_cil_push(
        struct log              *log,
-        int                     push_now)
+        xfs_lsn_t               push_seq)
 {
        struct xfs_cil          *cil = log->l_cilp;
        struct xfs_log_vec      *lv;
@@ -455,12 +419,20 @@ xlog_cil_push(
        if (!cil)
                return 0;
+        ASSERT(!push_seq || push_seq <= cil->xc_ctx->sequence);
        new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS);
        new_ctx->ticket = xlog_cil_ticket_alloc(log);
-        /* lock out transaction commit, but don't block on background push */
+        /*
+         * Lock out transaction commit, but don't block for background pushes
+         * unless we are well over the CIL space limit. See the definition of
+         * XLOG_CIL_HARD_SPACE_LIMIT() for the full explanation of the logic
+         * used here.
+         */
        if (!down_write_trylock(&cil->xc_ctx_lock)) {
-                if (!push_now)
+                if (!push_seq &&
+                    cil->xc_ctx->space_used < XLOG_CIL_HARD_SPACE_LIMIT(log))
                        goto out_free_ticket;
                down_write(&cil->xc_ctx_lock);
        }
@@ -471,7 +443,11 @@ xlog_cil_push(
                goto out_skip;
        /* check for spurious background flush */
-        if (!push_now && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log))
+        if (!push_seq && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log))
+                goto out_skip;
+        /* check for a previously pushed seqeunce */
+        if (push_seq && push_seq < cil->xc_ctx->sequence)
                goto out_skip;
        /*
@@ -517,6 +493,13 @@ xlog_cil_push(
        cil->xc_ctx = new_ctx;
        /*
+         * mirror the new sequence into the cil structure so that we can do
+         * unlocked checks against the current sequence in log forces without
+         * risking deferencing a freed context pointer.
+         */
+        cil->xc_current_sequence = new_ctx->sequence;
+        /*
         * The switch is now done, so we can drop the context lock and move out
         * of a shared context. We can't just go straight to the commit record,
         * though - we need to synchronise with previous and future commits so
@@ -554,7 +537,7 @@ xlog_cil_push(
        thdr.th_type = XFS_TRANS_CHECKPOINT;
        thdr.th_tid = tic->t_tid;
        thdr.th_num_items = num_iovecs;
-        lhdr.i_addr = (xfs_caddr_t)&thdr;
+        lhdr.i_addr = &thdr;
        lhdr.i_len = sizeof(xfs_trans_header_t);
        lhdr.i_type = XLOG_REG_TYPE_TRANSHDR;
        tic->t_curr_res -= lhdr.i_len + sizeof(xlog_op_header_t);
@@ -628,6 +611,105 @@ out_abort:
 }
 /*
+ * Commit a transaction with the given vector to the Committed Item List.
+ *
+ * To do this, we need to format the item, pin it in memory if required and
+ * account for the space used by the transaction. Once we have done that we
+ * need to release the unused reservation for the transaction, attach the
+ * transaction to the checkpoint context so we carry the busy extents through
+ * to checkpoint completion, and then unlock all the items in the transaction.
+ *
+ * For more specific information about the order of operations in
+ * xfs_log_commit_cil() please refer to the comments in
+ * xfs_trans_commit_iclog().
+ *
+ * Called with the context lock already held in read mode to lock out
+ * background commit, returns without it held once background commits are
+ * allowed again.
+ */
+int
+xfs_log_commit_cil(
+        struct xfs_mount        *mp,
+        struct xfs_trans        *tp,
+        struct xfs_log_vec      *log_vector,
+        xfs_lsn_t               *commit_lsn,
+        int                     flags)
+{
+        struct log              *log = mp->m_log;
+        int                     log_flags = 0;
+        int                     push = 0;
+        if (flags & XFS_TRANS_RELEASE_LOG_RES)
+                log_flags = XFS_LOG_REL_PERM_RESERV;
+        if (XLOG_FORCED_SHUTDOWN(log)) {
+                xlog_cil_free_logvec(log_vector);
+                return XFS_ERROR(EIO);
+        }
+        /*
+         * do all the hard work of formatting items (including memory
+         * allocation) outside the CIL context lock. This prevents stalling CIL
+         * pushes when we are low on memory and a transaction commit spends a
+         * lot of time in memory reclaim.
+         */
+        xlog_cil_format_items(log, log_vector);
+        /* lock out background commit */
+        down_read(&log->l_cilp->xc_ctx_lock);
+        if (commit_lsn)
+                *commit_lsn = log->l_cilp->xc_ctx->sequence;
+        xlog_cil_insert_items(log, log_vector, tp->t_ticket);
+        /* check we didn't blow the reservation */
+        if (tp->t_ticket->t_curr_res < 0)
+                xlog_print_tic_res(log->l_mp, tp->t_ticket);
+        /* attach the transaction to the CIL if it has any busy extents */
+        if (!list_empty(&tp->t_busy)) {
+                spin_lock(&log->l_cilp->xc_cil_lock);
+                list_splice_init(&tp->t_busy,
+                                        &log->l_cilp->xc_ctx->busy_extents);
+                spin_unlock(&log->l_cilp->xc_cil_lock);
+        }
+        tp->t_commit_lsn = *commit_lsn;
+        xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
+        xfs_trans_unreserve_and_mod_sb(tp);
+        /*
+         * Once all the items of the transaction have been copied to the CIL,
+         * the items can be unlocked and freed.
+         *
+         * This needs to be done before we drop the CIL context lock because we
+         * have to update state in the log items and unlock them before they go
+         * to disk. If we don't, then the CIL checkpoint can race with us and
+         * we can run checkpoint completion before we've updated and unlocked
+         * the log items. This affects (at least) processing of stale buffers,
+         * inodes and EFIs.
+         */
+        xfs_trans_free_items(tp, *commit_lsn, 0);
+        /* check for background commit before unlock */
+        if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log))
+                push = 1;
+        up_read(&log->l_cilp->xc_ctx_lock);
+        /*
+         * We need to push CIL every so often so we don't cache more than we
+         * can fit in the log. The limit really is that a checkpoint can't be
+         * more than half the log (the current checkpoint is not allowed to
+         * overwrite the previous checkpoint), but commit latency and memory
+         * usage limit this to a smaller size in most cases.
+         */
+        if (push)
+                xlog_cil_push(log, 0);
+        return 0;
+}
+/*
 * Conditionally push the CIL based on the sequence passed in.
 *
 * We only need to push if we haven't already pushed the sequence
@@ -641,39 +723,34 @@ out_abort:
 * commit lsn is there. It'll be empty, so this is broken for now.
 */
 xfs_lsn_t
-xlog_cil_push_lsn(
+xlog_cil_force_lsn(
        struct log      *log,
-        xfs_lsn_t       push_seq)
+        xfs_lsn_t       sequence)
 {
        struct xfs_cil          *cil = log->l_cilp;
        struct xfs_cil_ctx      *ctx;
        xfs_lsn_t               commit_lsn = NULLCOMMITLSN;
-restart:
+        ASSERT(sequence <= cil->xc_current_sequence);
-        down_write(&cil->xc_ctx_lock);
-        ASSERT(push_seq <= cil->xc_ctx->sequence);
+        /*
+         * check to see if we need to force out the current context.
-        /* check to see if we need to force out the current context */
+         * xlog_cil_push() handles racing pushes for the same sequence,
-        if (push_seq == cil->xc_ctx->sequence) {
+         * so no need to deal with it here.
-                up_write(&cil->xc_ctx_lock);
+         */
-                xlog_cil_push(log, 1);
+        if (sequence == cil->xc_current_sequence)
-                goto restart;
+                xlog_cil_push(log, sequence);
-        }
        /*
         * See if we can find a previous sequence still committing.
-         * We can drop the flush lock as soon as we have the cil lock
-         * because we are now only comparing contexts protected by
-         * the cil lock.
-         *
         * We need to wait for all previous sequence commits to complete
         * before allowing the force of push_seq to go ahead. Hence block
         * on commits for those as well.
         */
+restart:
        spin_lock(&cil->xc_cil_lock);
-        up_write(&cil->xc_ctx_lock);
        list_for_each_entry(ctx, &cil->xc_committing, committing) {
-                if (ctx->sequence > push_seq)
+                if (ctx->sequence > sequence)
                        continue;
                if (!ctx->commit_lsn) {
                        /*
@@ -683,7 +760,7 @@ restart:
                        sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0);
                        goto restart;
                }
-                if (ctx->sequence != push_seq)
+                if (ctx->sequence != sequence)
                        continue;
                /* found it! */
                commit_lsn = ctx->commit_lsn;
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 8c072618965c..edcdfe01617f 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -422,16 +422,17 @@ struct xfs_cil {
        struct rw_semaphore     xc_ctx_lock;
        struct list_head        xc_committing;
        sv_t                    xc_commit_wait;
+        xfs_lsn_t               xc_current_sequence;
 };
 /*
- * The amount of log space we should the CIL to aggregate is difficult to size.
+ * The amount of log space we allow the CIL to aggregate is difficult to size.
- * Whatever we chose we have to make we can get a reservation for the log space
+ * Whatever we choose, we have to make sure we can get a reservation for the
- * effectively, that it is large enough to capture sufficient relogging to
+ * log space effectively, that it is large enough to capture sufficient
- * reduce log buffer IO significantly, but it is not too large for the log or
+ * relogging to reduce log buffer IO significantly, but it is not too large for
- * induces too much latency when writing out through the iclogs. We track both
+ * the log or induces too much latency when writing out through the iclogs. We
- * space consumed and the number of vectors in the checkpoint context, so we
+ * track both space consumed and the number of vectors in the checkpoint
- * need to decide which to use for limiting.
+ * context, so we need to decide which to use for limiting.
 *
 * Every log buffer we write out during a push needs a header reserved, which
 * is at least one sector and more for v2 logs. Hence we need a reservation of
@@ -458,16 +459,21 @@ struct xfs_cil {
 * checkpoint transaction ticket is specific to the checkpoint context, rather
 * than the CIL itself.
 *
- * With dynamic reservations, we can basically make up arbitrary limits for the
+ * With dynamic reservations, we can effectively make up arbitrary limits for
- * checkpoint size so long as they don't violate any other size rules.  Hence
+ * the checkpoint size so long as they don't violate any other size rules.
- * the initial maximum size for the checkpoint transaction will be set to a
+ * Recovery imposes a rule that no transaction exceed half the log, so we are
- * quarter of the log or 8MB, which ever is smaller. 8MB is an arbitrary limit
+ * limited by that.  Furthermore, the log transaction reservation subsystem
- * right now based on the latency of writing out a large amount of data through
+ * tries to keep 25% of the log free, so we need to keep below that limit or we
- * the circular iclog buffers.
+ * risk running out of free log space to start any new transactions.
+ *
+ * In order to keep background CIL push efficient, we will set a lower
+ * threshold at which background pushing is attempted without blocking current
+ * transaction commits.  A separate, higher bound defines when CIL pushes are
+ * enforced to ensure we stay within our maximum checkpoint size bounds.
+ * threshold, yet give us plenty of space for aggregation on large logs.
 */
+#define XLOG_CIL_SPACE_LIMIT(log)       (log->l_logsize >> 3)
-#define XLOG_CIL_SPACE_LIMIT(log)       \
+#define XLOG_CIL_HARD_SPACE_LIMIT(log)  (3 * (log->l_logsize >> 4))
-        (min((log->l_logsize >> 2), (8 * 1024 * 1024)))
 /*
 * The reservation head lsn is not made up of a cycle number and block number.
@@ -562,8 +568,16 @@ int	xlog_cil_init(struct log *log);
 void    xlog_cil_init_post_recovery(struct log *log);
 void    xlog_cil_destroy(struct log *log);
-int     xlog_cil_push(struct log *log, int push_now);
+/*
-xfs_lsn_t xlog_cil_push_lsn(struct log *log, xfs_lsn_t push_sequence);
+ * CIL force routines
+ */
+xfs_lsn_t xlog_cil_force_lsn(struct log *log, xfs_lsn_t sequence);
+static inline void
+xlog_cil_force(struct log *log)
+{
+        xlog_cil_force_lsn(log, log->l_cilp->xc_current_sequence);
+}
 /*
 * Unmount record type is used as a pseudo transaction type for the ticket.
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 9ac5cfab27b9..966d3f97458c 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -24,15 +24,11 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_error.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_inode_item.h"
@@ -111,7 +107,8 @@ xlog_get_bp(
                nbblks += log->l_sectBBsize;
        nbblks = round_up(nbblks, log->l_sectBBsize);
-        return xfs_buf_get_noaddr(BBTOB(nbblks), log->l_mp->m_logdev_targp);
+        return xfs_buf_get_uncached(log->l_mp->m_logdev_targp,
+                                        BBTOB(nbblks), 0);
 }
 STATIC void
@@ -171,7 +168,7 @@ xlog_bread_noalign(
        XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp);
        xfsbdstrat(log->l_mp, bp);
-        error = xfs_iowait(bp);
+        error = xfs_buf_iowait(bp);
        if (error)
                xfs_ioerror_alert("xlog_bread", log->l_mp,
                                  bp, XFS_BUF_ADDR(bp));
@@ -325,12 +322,13 @@ xlog_recover_iodone(
                 * this during recovery. One strike!
                 */
                xfs_ioerror_alert("xlog_recover_iodone",
-                                  bp->b_mount, bp, XFS_BUF_ADDR(bp));
+                                        bp->b_target->bt_mount, bp,
-                xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR);
+                                        XFS_BUF_ADDR(bp));
+                xfs_force_shutdown(bp->b_target->bt_mount,
+                                        SHUTDOWN_META_IO_ERROR);
        }
-        bp->b_mount = NULL;
        XFS_BUF_CLR_IODONE_FUNC(bp);
-        xfs_biodone(bp);
+        xfs_buf_ioend(bp, 0);
 }
 /*
@@ -1565,9 +1563,7 @@ xlog_recover_reorder_trans(
        list_splice_init(&trans->r_itemq, &sort_list);
        list_for_each_entry_safe(item, n, &sort_list, ri_list) {
-                xfs_buf_log_format_t    *buf_f;
+                xfs_buf_log_format_t    *buf_f = item->ri_buf[0].i_addr;
-                buf_f = (xfs_buf_log_format_t *)item->ri_buf[0].i_addr;
                switch (ITEM_TYPE(item)) {
                case XFS_LI_BUF:
@@ -1892,9 +1888,8 @@ xlog_recover_do_inode_buffer(
                 * current di_next_unlinked field.  Extract its value
                 * and copy it to the buffer copy.
                 */
-                logged_nextp = (xfs_agino_t *)
+                logged_nextp = item->ri_buf[item_index].i_addr +
-                               ((char *)(item->ri_buf[item_index].i_addr) +
+                                next_unlinked_offset - reg_buf_offset;
-                                (next_unlinked_offset - reg_buf_offset));
                if (unlikely(*logged_nextp == 0)) {
                        xfs_fs_cmn_err(CE_ALERT, mp,
                                "bad inode buffer log record (ptr = 0x%p, bp = 0x%p).  XFS trying to replay bad (0) inode di_next_unlinked field",
@@ -1973,8 +1968,7 @@ xlog_recover_do_reg_buffer(
                                        item->ri_buf[i].i_len, __func__);
                                goto next;
                        }
-                        error = xfs_qm_dqcheck((xfs_disk_dquot_t *)
+                        error = xfs_qm_dqcheck(item->ri_buf[i].i_addr,
-                                               item->ri_buf[i].i_addr,
                                               -1, 0, XFS_QMOPT_DOWARN,
                                               "dquot_buf_recover");
                        if (error)
@@ -2187,7 +2181,7 @@ xlog_recover_do_buffer_trans(
        xlog_recover_item_t     *item,
        int                     pass)
 {
-        xfs_buf_log_format_t    *buf_f;
+        xfs_buf_log_format_t    *buf_f = item->ri_buf[0].i_addr;
        xfs_mount_t             *mp;
        xfs_buf_t               *bp;
        int                     error;
@@ -2197,8 +2191,6 @@ xlog_recover_do_buffer_trans(
        ushort                  flags;
        uint                    buf_flags;
-        buf_f = (xfs_buf_log_format_t *)item->ri_buf[0].i_addr;
        if (pass == XLOG_RECOVER_PASS1) {
                /*
                 * In this pass we're only looking for buf items
@@ -2285,8 +2277,7 @@ xlog_recover_do_buffer_trans(
                XFS_BUF_STALE(bp);
                error = xfs_bwrite(mp, bp);
        } else {
-                ASSERT(bp->b_mount == NULL || bp->b_mount == mp);
+                ASSERT(bp->b_target->bt_mount == mp);
-                bp->b_mount = mp;
                XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
                xfs_bdwrite(mp, bp);
        }
@@ -2319,10 +2310,9 @@ xlog_recover_do_inode_trans(
        }
        if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) {
-                in_f = (xfs_inode_log_format_t *)item->ri_buf[0].i_addr;
+                in_f = item->ri_buf[0].i_addr;
        } else {
-                in_f = (xfs_inode_log_format_t *)kmem_alloc(
+                in_f = kmem_alloc(sizeof(xfs_inode_log_format_t), KM_SLEEP);
-                        sizeof(xfs_inode_log_format_t), KM_SLEEP);
                need_free = 1;
                error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f);
                if (error)
@@ -2370,7 +2360,7 @@ xlog_recover_do_inode_trans(
                error = EFSCORRUPTED;
                goto error;
        }
-        dicp = (xfs_icdinode_t *)(item->ri_buf[1].i_addr);
+        dicp = item->ri_buf[1].i_addr;
        if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) {
                xfs_buf_relse(bp);
                xfs_fs_cmn_err(CE_ALERT, mp,
@@ -2461,7 +2451,7 @@ xlog_recover_do_inode_trans(
        }
        /* The core is in in-core format */
-        xfs_dinode_to_disk(dip, (xfs_icdinode_t *)item->ri_buf[1].i_addr);
+        xfs_dinode_to_disk(dip, item->ri_buf[1].i_addr);
        /* the rest is in on-disk format */
        if (item->ri_buf[1].i_len > sizeof(struct xfs_icdinode)) {
@@ -2551,8 +2541,7 @@ xlog_recover_do_inode_trans(
        }
 write_inode_buffer:
-        ASSERT(bp->b_mount == NULL || bp->b_mount == mp);
+        ASSERT(bp->b_target->bt_mount == mp);
-        bp->b_mount = mp;
        XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
        xfs_bdwrite(mp, bp);
 error:
@@ -2578,7 +2567,7 @@ xlog_recover_do_quotaoff_trans(
                return (0);
        }
-        qoff_f = (xfs_qoff_logformat_t *)item->ri_buf[0].i_addr;
+        qoff_f = item->ri_buf[0].i_addr;
        ASSERT(qoff_f);
        /*
@@ -2622,9 +2611,8 @@ xlog_recover_do_dquot_trans(
        if (mp->m_qflags == 0)
                return (0);
-        recddq = (xfs_disk_dquot_t *)item->ri_buf[1].i_addr;
+        recddq = item->ri_buf[1].i_addr;
+        if (recddq == NULL) {
-        if (item->ri_buf[1].i_addr == NULL) {
                cmn_err(CE_ALERT,
                        "XFS: NULL dquot in %s.", __func__);
                return XFS_ERROR(EIO);
@@ -2654,7 +2642,7 @@ xlog_recover_do_dquot_trans(
         * The other possibility, of course, is that the quota subsystem was
         * removed since the last mount - ENOSYS.
         */
-        dq_f = (xfs_dq_logformat_t *)item->ri_buf[0].i_addr;
+        dq_f = item->ri_buf[0].i_addr;
        ASSERT(dq_f);
        if ((error = xfs_qm_dqcheck(recddq,
                           dq_f->qlf_id,
@@ -2690,8 +2678,7 @@ xlog_recover_do_dquot_trans(
        memcpy(ddq, recddq, item->ri_buf[1].i_len);
        ASSERT(dq_f->qlf_size == 2);
-        ASSERT(bp->b_mount == NULL || bp->b_mount == mp);
+        ASSERT(bp->b_target->bt_mount == mp);
-        bp->b_mount = mp;
        XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
        xfs_bdwrite(mp, bp);
@@ -2721,7 +2708,7 @@ xlog_recover_do_efi_trans(
                return 0;
        }
-        efi_formatp = (xfs_efi_log_format_t *)item->ri_buf[0].i_addr;
+        efi_formatp = item->ri_buf[0].i_addr;
        mp = log->l_mp;
        efip = xfs_efi_init(mp, efi_formatp->efi_nextents);
@@ -2767,7 +2754,7 @@ xlog_recover_do_efd_trans(
                return;
        }
-        efd_formatp = (xfs_efd_log_format_t *)item->ri_buf[0].i_addr;
+        efd_formatp = item->ri_buf[0].i_addr;
        ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) +
                ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) ||
               (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) +
@@ -3829,7 +3816,7 @@ xlog_do_recover(
        XFS_BUF_READ(bp);
        XFS_BUF_UNASYNC(bp);
        xfsbdstrat(log->l_mp, bp);
-        error = xfs_iowait(bp);
+        error = xfs_buf_iowait(bp);
        if (error) {
                xfs_ioerror_alert("xlog_do_recover",
                                  log->l_mp, bp, XFS_BUF_ADDR(bp));
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 69f62d8b2816..b1498ab5a399 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -25,13 +25,10 @@
 #include "xfs_sb.h"
 #include "xfs_ag.h"
 #include "xfs_dir2.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_btree.h"
@@ -55,16 +52,11 @@ STATIC void	xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t,
                                                int);
 STATIC void     xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t,
                                                int);
-STATIC int      xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t,
-                                                int64_t, int);
 STATIC void     xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
 #else
 #define xfs_icsb_balance_counter(mp, a, b)              do { } while (0)
 #define xfs_icsb_balance_counter_locked(mp, a, b)       do { } while (0)
-#define xfs_icsb_modify_counters(mp, a, b, c)           do { } while (0)
 #endif
 static const struct {
@@ -202,6 +194,8 @@ xfs_uuid_unmount(
 /*
 * Reference counting access wrappers to the perag structures.
+ * Because we never free per-ag structures, the only thing we
+ * have to protect against changes is the tree structure itself.
 */
 struct xfs_perag *
 xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
@@ -209,19 +203,43 @@ xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
        struct xfs_perag        *pag;
        int                     ref = 0;
-        spin_lock(&mp->m_perag_lock);
+        rcu_read_lock();
        pag = radix_tree_lookup(&mp->m_perag_tree, agno);
        if (pag) {
                ASSERT(atomic_read(&pag->pag_ref) >= 0);
-                /* catch leaks in the positive direction during testing */
-                ASSERT(atomic_read(&pag->pag_ref) < 1000);
                ref = atomic_inc_return(&pag->pag_ref);
        }
-        spin_unlock(&mp->m_perag_lock);
+        rcu_read_unlock();
        trace_xfs_perag_get(mp, agno, ref, _RET_IP_);
        return pag;
 }
+/*
+ * search from @first to find the next perag with the given tag set.
+ */
+struct xfs_perag *
+xfs_perag_get_tag(
+        struct xfs_mount        *mp,
+        xfs_agnumber_t          first,
+        int                     tag)
+{
+        struct xfs_perag        *pag;
+        int                     found;
+        int                     ref;
+        rcu_read_lock();
+        found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
+                                        (void **)&pag, first, 1, tag);
+        if (found <= 0) {
+                rcu_read_unlock();
+                return NULL;
+        }
+        ref = atomic_inc_return(&pag->pag_ref);
+        rcu_read_unlock();
+        trace_xfs_perag_get_tag(mp, pag->pag_agno, ref, _RET_IP_);
+        return pag;
+}
 void
 xfs_perag_put(struct xfs_perag *pag)
 {
@@ -232,10 +250,18 @@ xfs_perag_put(struct xfs_perag *pag)
        trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_);
 }
+STATIC void
+__xfs_free_perag(
+        struct rcu_head *head)
+{
+        struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head);
+        ASSERT(atomic_read(&pag->pag_ref) == 0);
+        kmem_free(pag);
+}
 /*
- * Free up the resources associated with a mount structure.  Assume that
+ * Free up the per-ag resources associated with the mount structure.
- * the structure was initially zeroed, so we can tell which fields got
- * initialized.
 */
 STATIC void
 xfs_free_perag(
@@ -247,10 +273,9 @@ xfs_free_perag(
        for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
                spin_lock(&mp->m_perag_lock);
                pag = radix_tree_delete(&mp->m_perag_tree, agno);
-                ASSERT(pag);
-                ASSERT(atomic_read(&pag->pag_ref) == 0);
                spin_unlock(&mp->m_perag_lock);
-                kmem_free(pag);
+                ASSERT(pag);
+                call_rcu(&pag->rcu_head, __xfs_free_perag);
        }
 }
@@ -447,7 +472,10 @@ xfs_initialize_perag(
                pag->pag_agno = index;
                pag->pag_mount = mp;
                rwlock_init(&pag->pag_ici_lock);
+                mutex_init(&pag->pag_ici_reclaim_lock);
                INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
+                spin_lock_init(&pag->pag_buf_lock);
+                pag->pag_buf_tree = RB_ROOT;
                if (radix_tree_preload(GFP_NOFS))
                        goto out_unwind;
@@ -642,7 +670,6 @@ int
 xfs_readsb(xfs_mount_t *mp, int flags)
 {
        unsigned int    sector_size;
-        unsigned int    extra_flags;
        xfs_buf_t       *bp;
        int             error;
@@ -655,28 +682,24 @@ xfs_readsb(xfs_mount_t *mp, int flags)
         * access to the superblock.
         */
        sector_size = xfs_getsize_buftarg(mp->m_ddev_targp);
-        extra_flags = XBF_LOCK | XBF_FS_MANAGED | XBF_MAPPED;
-        bp = xfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR, BTOBB(sector_size),
+reread:
-                          extra_flags);
+        bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp,
-        if (!bp || XFS_BUF_ISERROR(bp)) {
+                                        XFS_SB_DADDR, sector_size, 0);
-                xfs_fs_mount_cmn_err(flags, "SB read failed");
+        if (!bp) {
-                error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM;
+                xfs_fs_mount_cmn_err(flags, "SB buffer read failed");
-                goto fail;
+                return EIO;
        }
-        ASSERT(XFS_BUF_ISBUSY(bp));
-        ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
        /*
         * Initialize the mount structure from the superblock.
         * But first do some basic consistency checking.
         */
        xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp));
        error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags);
        if (error) {
                xfs_fs_mount_cmn_err(flags, "SB validate failed");
-                goto fail;
+                goto release_buf;
        }
        /*
@@ -687,7 +710,7 @@ xfs_readsb(xfs_mount_t *mp, int flags)
                        "device supports only %u byte sectors (not %u)",
                        sector_size, mp->m_sb.sb_sectsize);
                error = ENOSYS;
-                goto fail;
+                goto release_buf;
        }
        /*
@@ -695,33 +718,20 @@ xfs_readsb(xfs_mount_t *mp, int flags)
         * re-read the superblock so the buffer is correctly sized.
         */
        if (sector_size < mp->m_sb.sb_sectsize) {
-                XFS_BUF_UNMANAGE(bp);
                xfs_buf_relse(bp);
                sector_size = mp->m_sb.sb_sectsize;
-                bp = xfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR,
+                goto reread;
-                                  BTOBB(sector_size), extra_flags);
-                if (!bp || XFS_BUF_ISERROR(bp)) {
-                        xfs_fs_mount_cmn_err(flags, "SB re-read failed");
-                        error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM;
-                        goto fail;
-                }
-                ASSERT(XFS_BUF_ISBUSY(bp));
-                ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
        }
        /* Initialize per-cpu counters */
        xfs_icsb_reinit_counters(mp);
        mp->m_sb_bp = bp;
-        xfs_buf_relse(bp);
+        xfs_buf_unlock(bp);
-        ASSERT(XFS_BUF_VALUSEMA(bp) > 0);
        return 0;
- fail:
+release_buf:
-        if (bp) {
+        xfs_buf_relse(bp);
-                XFS_BUF_UNMANAGE(bp);
-                xfs_buf_relse(bp);
-        }
        return error;
 }
@@ -994,42 +1004,35 @@ xfs_check_sizes(xfs_mount_t *mp)
 {
        xfs_buf_t       *bp;
        xfs_daddr_t     d;
-        int             error;
        d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
        if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
-                cmn_err(CE_WARN, "XFS: size check 1 failed");
+                cmn_err(CE_WARN, "XFS: filesystem size mismatch detected");
                return XFS_ERROR(EFBIG);
        }
-        error = xfs_read_buf(mp, mp->m_ddev_targp,
+        bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp,
-                             d - XFS_FSS_TO_BB(mp, 1),
+                                        d - XFS_FSS_TO_BB(mp, 1),
-                             XFS_FSS_TO_BB(mp, 1), 0, &bp);
+                                        BBTOB(XFS_FSS_TO_BB(mp, 1)), 0);
-        if (!error) {
+        if (!bp) {
-                xfs_buf_relse(bp);
+                cmn_err(CE_WARN, "XFS: last sector read failed");
-        } else {
+                return EIO;
-                cmn_err(CE_WARN, "XFS: size check 2 failed");
-                if (error == ENOSPC)
-                        error = XFS_ERROR(EFBIG);
-                return error;
        }
+        xfs_buf_relse(bp);
        if (mp->m_logdev_targp != mp->m_ddev_targp) {
                d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
                if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
-                        cmn_err(CE_WARN, "XFS: size check 3 failed");
+                        cmn_err(CE_WARN, "XFS: log size mismatch detected");
                        return XFS_ERROR(EFBIG);
                }
-                error = xfs_read_buf(mp, mp->m_logdev_targp,
+                bp = xfs_buf_read_uncached(mp, mp->m_logdev_targp,
-                                     d - XFS_FSB_TO_BB(mp, 1),
+                                        d - XFS_FSB_TO_BB(mp, 1),
-                                     XFS_FSB_TO_BB(mp, 1), 0, &bp);
+                                        XFS_FSB_TO_B(mp, 1), 0);
-                if (!error) {
+                if (!bp) {
-                        xfs_buf_relse(bp);
+                        cmn_err(CE_WARN, "XFS: log device read failed");
-                } else {
+                        return EIO;
-                        cmn_err(CE_WARN, "XFS: size check 3 failed");
-                        if (error == ENOSPC)
-                                error = XFS_ERROR(EFBIG);
-                        return error;
                }
+                xfs_buf_relse(bp);
        }
        return 0;
 }
@@ -1604,7 +1607,7 @@ xfs_unmountfs_writesb(xfs_mount_t *mp)
                XFS_BUF_UNASYNC(sbp);
                ASSERT(XFS_BUF_TARGET(sbp) == mp->m_ddev_targp);
                xfsbdstrat(mp, sbp);
-                error = xfs_iowait(sbp);
+                error = xfs_buf_iowait(sbp);
                if (error)
                        xfs_ioerror_alert("xfs_unmountfs_writesb",
                                          mp, sbp, XFS_BUF_ADDR(sbp));
@@ -1835,135 +1838,72 @@ xfs_mod_incore_sb_unlocked(
 */
 int
 xfs_mod_incore_sb(
-        xfs_mount_t     *mp,
+        struct xfs_mount        *mp,
-        xfs_sb_field_t  field,
+        xfs_sb_field_t          field,
-        int64_t         delta,
+        int64_t                 delta,
-        int             rsvd)
+        int                     rsvd)
 {
-        int     status;
+        int                     status;
-        /* check for per-cpu counters */
-        switch (field) {
 #ifdef HAVE_PERCPU_SB
-        case XFS_SBS_ICOUNT:
+        ASSERT(field < XFS_SBS_ICOUNT || field > XFS_SBS_FDBLOCKS);
-        case XFS_SBS_IFREE:
-        case XFS_SBS_FDBLOCKS:
-                if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
-                        status = xfs_icsb_modify_counters(mp, field,
-                                                        delta, rsvd);
-                        break;
-                }
-                /* FALLTHROUGH */
 #endif
-        default:
+        spin_lock(&mp->m_sb_lock);
-                spin_lock(&mp->m_sb_lock);
+        status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
-                status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
+        spin_unlock(&mp->m_sb_lock);
-                spin_unlock(&mp->m_sb_lock);
-                break;
-        }
        return status;
 }
 /*
- * xfs_mod_incore_sb_batch() is used to change more than one field
+ * Change more than one field in the in-core superblock structure at a time.
- * in the in-core superblock structure at a time.  This modification
- * is protected by a lock internal to this module.  The fields and
- * changes to those fields are specified in the array of xfs_mod_sb
- * structures passed in.
 *
- * Either all of the specified deltas will be applied or none of
+ * The fields and changes to those fields are specified in the array of
- * them will.  If any modified field dips below 0, then all modifications
+ * xfs_mod_sb structures passed in.  Either all of the specified deltas
- * will be backed out and EINVAL will be returned.
+ * will be applied or none of them will.  If any modified field dips below 0,
+ * then all modifications will be backed out and EINVAL will be returned.
+ *
+ * Note that this function may not be used for the superblock values that
+ * are tracked with the in-memory per-cpu counters - a direct call to
+ * xfs_icsb_modify_counters is required for these.
 */
 int
-xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd)
+xfs_mod_incore_sb_batch(
+        struct xfs_mount        *mp,
+        xfs_mod_sb_t            *msb,
+        uint                    nmsb,
+        int                     rsvd)
 {
-        int             status=0;
+        xfs_mod_sb_t            *msbp = &msb[0];
-        xfs_mod_sb_t    *msbp;
+        int                     error = 0;
        /*
-         * Loop through the array of mod structures and apply each
+         * Loop through the array of mod structures and apply each individually.
-         * individually.  If any fail, then back out all those
+         * If any fail, then back out all those which have already been applied.
-         * which have already been applied.  Do all of this within
+         * Do all of this within the scope of the m_sb_lock so that all of the
-         * the scope of the m_sb_lock so that all of the changes will
+         * changes will be atomic.
-         * be atomic.
         */
        spin_lock(&mp->m_sb_lock);
-        msbp = &msb[0];
        for (msbp = &msbp[0]; msbp < (msb + nmsb); msbp++) {
-                /*
+                ASSERT(msbp->msb_field < XFS_SBS_ICOUNT ||
-                 * Apply the delta at index n.  If it fails, break
+                       msbp->msb_field > XFS_SBS_FDBLOCKS);
-                 * from the loop so we'll fall into the undo loop
-                 * below.
-                 */
-                switch (msbp->msb_field) {
-#ifdef HAVE_PERCPU_SB
-                case XFS_SBS_ICOUNT:
-                case XFS_SBS_IFREE:
-                case XFS_SBS_FDBLOCKS:
-                        if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
-                                spin_unlock(&mp->m_sb_lock);
-                                status = xfs_icsb_modify_counters(mp,
-                                                        msbp->msb_field,
-                                                        msbp->msb_delta, rsvd);
-                                spin_lock(&mp->m_sb_lock);
-                                break;
-                        }
-                        /* FALLTHROUGH */
-#endif
-                default:
-                        status = xfs_mod_incore_sb_unlocked(mp,
-                                                msbp->msb_field,
-                                                msbp->msb_delta, rsvd);
-                        break;
-                }
-                if (status != 0) {
+                error = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field,
-                        break;
+                                                   msbp->msb_delta, rsvd);
-                }
+                if (error)
+                        goto unwind;
        }
+        spin_unlock(&mp->m_sb_lock);
+        return 0;
-        /*
+unwind:
-         * If we didn't complete the loop above, then back out
+        while (--msbp >= msb) {
-         * any changes made to the superblock.  If you add code
+                error = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field,
-         * between the loop above and here, make sure that you
+                                                   -msbp->msb_delta, rsvd);
-         * preserve the value of status. Loop back until
+                ASSERT(error == 0);
-         * we step below the beginning of the array.  Make sure
-         * we don't touch anything back there.
-         */
-        if (status != 0) {
-                msbp--;
-                while (msbp >= msb) {
-                        switch (msbp->msb_field) {
-#ifdef HAVE_PERCPU_SB
-                        case XFS_SBS_ICOUNT:
-                        case XFS_SBS_IFREE:
-                        case XFS_SBS_FDBLOCKS:
-                                if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
-                                        spin_unlock(&mp->m_sb_lock);
-                                        status = xfs_icsb_modify_counters(mp,
-                                                        msbp->msb_field,
-                                                        -(msbp->msb_delta),
-                                                        rsvd);
-                                        spin_lock(&mp->m_sb_lock);
-                                        break;
-                                }
-                                /* FALLTHROUGH */
-#endif
-                        default:
-                                status = xfs_mod_incore_sb_unlocked(mp,
-                                                        msbp->msb_field,
-                                                        -(msbp->msb_delta),
-                                                        rsvd);
-                                break;
-                        }
-                        ASSERT(status == 0);
-                        msbp--;
-                }
        }
        spin_unlock(&mp->m_sb_lock);
-        return status;
+        return error;
 }
 /*
@@ -2001,18 +1941,13 @@ xfs_getsb(
 */
 void
 xfs_freesb(
-        xfs_mount_t     *mp)
+        struct xfs_mount        *mp)
 {
-        xfs_buf_t       *bp;
+        struct xfs_buf          *bp = mp->m_sb_bp;
-        /*
+        xfs_buf_lock(bp);
-         * Use xfs_getsb() so that the buffer will be locked
-         * when we call xfs_buf_relse().
-         */
-        bp = xfs_getsb(mp, 0);
-        XFS_BUF_UNMANAGE(bp);
-        xfs_buf_relse(bp);
        mp->m_sb_bp = NULL;
+        xfs_buf_relse(bp);
 }
 /*
@@ -2499,7 +2434,7 @@ xfs_icsb_balance_counter(
        spin_unlock(&mp->m_sb_lock);
 }
-STATIC int
+int
 xfs_icsb_modify_counters(
        xfs_mount_t     *mp,
        xfs_sb_field_t  field,
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 5761087ee8ea..5861b4980740 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -53,7 +53,6 @@ typedef struct xfs_trans_reservations {
 #include "xfs_sync.h"
-struct cred;
 struct log;
 struct xfs_mount_args;
 struct xfs_inode;
@@ -66,65 +65,6 @@ struct xfs_nameops;
 struct xfs_ail;
 struct xfs_quotainfo;
-/*
- * Prototypes and functions for the Data Migration subsystem.
- */
-typedef int     (*xfs_send_data_t)(int, struct xfs_inode *,
-                        xfs_off_t, size_t, int, int *);
-typedef int     (*xfs_send_mmap_t)(struct vm_area_struct *, uint);
-typedef int     (*xfs_send_destroy_t)(struct xfs_inode *, dm_right_t);
-typedef int     (*xfs_send_namesp_t)(dm_eventtype_t, struct xfs_mount *,
-                        struct xfs_inode *, dm_right_t,
-                        struct xfs_inode *, dm_right_t,
-                        const unsigned char *, const unsigned char *,
-                        mode_t, int, int);
-typedef int     (*xfs_send_mount_t)(struct xfs_mount *, dm_right_t,
-                        char *, char *);
-typedef void    (*xfs_send_unmount_t)(struct xfs_mount *, struct xfs_inode *,
-                        dm_right_t, mode_t, int, int);
-typedef struct xfs_dmops {
-        xfs_send_data_t         xfs_send_data;
-        xfs_send_mmap_t         xfs_send_mmap;
-        xfs_send_destroy_t      xfs_send_destroy;
-        xfs_send_namesp_t       xfs_send_namesp;
-        xfs_send_mount_t        xfs_send_mount;
-        xfs_send_unmount_t      xfs_send_unmount;
-} xfs_dmops_t;
-#define XFS_DMAPI_UNMOUNT_FLAGS(mp) \
-        (((mp)->m_dmevmask & (1 << DM_EVENT_UNMOUNT)) ? 0 : DM_FLAGS_UNWANTED)
-#define XFS_SEND_DATA(mp, ev,ip,off,len,fl,lock) \
-        (*(mp)->m_dm_ops->xfs_send_data)(ev,ip,off,len,fl,lock)
-#define XFS_SEND_MMAP(mp, vma,fl) \
-        (*(mp)->m_dm_ops->xfs_send_mmap)(vma,fl)
-#define XFS_SEND_DESTROY(mp, ip,right) \
-        (*(mp)->m_dm_ops->xfs_send_destroy)(ip,right)
-#define XFS_SEND_NAMESP(mp, ev,b1,r1,b2,r2,n1,n2,mode,rval,fl) \
-        (*(mp)->m_dm_ops->xfs_send_namesp)(ev,NULL,b1,r1,b2,r2,n1,n2,mode,rval,fl)
-#define XFS_SEND_MOUNT(mp,right,path,name) \
-        (*(mp)->m_dm_ops->xfs_send_mount)(mp,right,path,name)
-#define XFS_SEND_PREUNMOUNT(mp) \
-do { \
-        if (mp->m_flags & XFS_MOUNT_DMAPI) { \
-                (*(mp)->m_dm_ops->xfs_send_namesp)(DM_EVENT_PREUNMOUNT, mp, \
-                        (mp)->m_rootip, DM_RIGHT_NULL, \
-                        (mp)->m_rootip, DM_RIGHT_NULL, \
-                        NULL, NULL, 0, 0, XFS_DMAPI_UNMOUNT_FLAGS(mp)); \
-        } \
-} while (0)
-#define XFS_SEND_UNMOUNT(mp) \
-do { \
-        if (mp->m_flags & XFS_MOUNT_DMAPI) { \
-                (*(mp)->m_dm_ops->xfs_send_unmount)(mp, (mp)->m_rootip, \
-                        DM_RIGHT_NULL, 0, 0, XFS_DMAPI_UNMOUNT_FLAGS(mp)); \
-        } \
-} while (0)
 #ifdef HAVE_PERCPU_SB
 /*
@@ -150,6 +90,8 @@ extern void	xfs_icsb_reinit_counters(struct xfs_mount *);
 extern void     xfs_icsb_destroy_counters(struct xfs_mount *);
 extern void     xfs_icsb_sync_counters(struct xfs_mount *, int);
 extern void     xfs_icsb_sync_counters_locked(struct xfs_mount *, int);
+extern int      xfs_icsb_modify_counters(struct xfs_mount *, xfs_sb_field_t,
+                                                int64_t, int);
 #else
 #define xfs_icsb_init_counters(mp)              (0)
@@ -157,6 +99,8 @@ extern void	xfs_icsb_sync_counters_locked(struct xfs_mount *, int);
 #define xfs_icsb_reinit_counters(mp)            do { } while (0)
 #define xfs_icsb_sync_counters(mp, flags)       do { } while (0)
 #define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0)
+#define xfs_icsb_modify_counters(mp, field, delta, rsvd) \
+        xfs_mod_incore_sb(mp, field, delta, rsvd)
 #endif
 typedef struct xfs_mount {
@@ -241,8 +185,6 @@ typedef struct xfs_mount {
        uint                    m_chsize;       /* size of next field */
        struct xfs_chash        *m_chash;       /* fs private inode per-cluster
                                                 * hash table */
-        struct xfs_dmops        *m_dm_ops;      /* vector of DMI ops */
-        struct xfs_qmops        *m_qm_ops;      /* vector of XQM ops */
        atomic_t                m_active_trans; /* number trans frozen */
 #ifdef HAVE_PERCPU_SB
        xfs_icsb_cnts_t __percpu *m_sb_cnts;    /* per-cpu superblock counters */
@@ -269,7 +211,6 @@ typedef struct xfs_mount {
                                                   must be synchronous except
                                                   for space allocations */
 #define XFS_MOUNT_DELAYLOG      (1ULL << 1)     /* delayed logging is enabled */
-#define XFS_MOUNT_DMAPI         (1ULL << 2)     /* dmapi is enabled */
 #define XFS_MOUNT_WAS_CLEAN     (1ULL << 3)
 #define XFS_MOUNT_FS_SHUTDOWN   (1ULL << 4)     /* atomic stop of all filesystem
                                                   operations, typically for
@@ -282,8 +223,6 @@ typedef struct xfs_mount {
 #define XFS_MOUNT_GRPID         (1ULL << 9)     /* group-ID assigned from directory */
 #define XFS_MOUNT_NORECOVERY    (1ULL << 10)    /* no recovery - dirty fs */
 #define XFS_MOUNT_DFLT_IOSIZE   (1ULL << 12)    /* set default i/o size */
-#define XFS_MOUNT_OSYNCISOSYNC  (1ULL << 13)    /* o_sync is REALLY o_sync */
-                                                /* osyncisdsync is now default*/
 #define XFS_MOUNT_32BITINODES   (1ULL << 14)    /* do not create inodes above
                                                 * 32 bits in size */
 #define XFS_MOUNT_SMALL_INUMS   (1ULL << 15)    /* users wants 32bit inodes */
@@ -296,8 +235,6 @@ typedef struct xfs_mount {
 #define XFS_MOUNT_DIRSYNC       (1ULL << 21)    /* synchronous directory ops */
 #define XFS_MOUNT_COMPAT_IOSIZE (1ULL << 22)    /* don't report large preferred
                                                 * I/O size in stat() */
-#define XFS_MOUNT_NO_PERCPU_SB  (1ULL << 23)    /* don't use per-cpu superblock
-                                                   counters */
 #define XFS_MOUNT_FILESTREAMS   (1ULL << 24)    /* enable the filestreams
                                                   allocator */
 #define XFS_MOUNT_NOATTR2       (1ULL << 25)    /* disable use of attr2 format */
@@ -391,6 +328,8 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
 * perag get/put wrappers for ref counting
 */
 struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno);
+struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *mp, xfs_agnumber_t agno,
+                                        int tag);
 void    xfs_perag_put(struct xfs_perag *pag);
 /*
@@ -440,11 +379,6 @@ extern int	xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t);
 extern int      xfs_dev_is_read_only(struct xfs_mount *, char *);
-extern int      xfs_dmops_get(struct xfs_mount *);
-extern void     xfs_dmops_put(struct xfs_mount *);
-extern struct xfs_dmops xfs_dmcore_xfs;
 #endif  /* __KERNEL__ */
 extern void     xfs_mod_sb(struct xfs_trans *, __int64_t);
diff --git a/fs/xfs/xfs_refcache.h b/fs/xfs/xfs_refcache.h
deleted file mode 100644
index 2dec79edb510..000000000000
--- a/fs/xfs/xfs_refcache.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_REFCACHE_H__
-#define __XFS_REFCACHE_H__
-#ifdef HAVE_REFCACHE
-/*
- * Maximum size (in inodes) for the NFS reference cache
- */
-#define XFS_REFCACHE_SIZE_MAX   512
-struct xfs_inode;
-struct xfs_mount;
-extern void xfs_refcache_insert(struct xfs_inode *);
-extern void xfs_refcache_purge_ip(struct xfs_inode *);
-extern void xfs_refcache_purge_mp(struct xfs_mount *);
-extern void xfs_refcache_purge_some(struct xfs_mount *);
-extern void xfs_refcache_resize(int);
-extern void xfs_refcache_destroy(void);
-extern void xfs_refcache_iunlock(struct xfs_inode *, uint);
-#else
-#define xfs_refcache_insert(ip)         do { } while (0)
-#define xfs_refcache_purge_ip(ip)       do { } while (0)
-#define xfs_refcache_purge_mp(mp)       do { } while (0)
-#define xfs_refcache_purge_some(mp)     do { } while (0)
-#define xfs_refcache_resize(size)       do { } while (0)
-#define xfs_refcache_destroy()          do { } while (0)
-#define xfs_refcache_iunlock(ip, flags) xfs_iunlock(ip, flags)
-#endif
-#endif  /* __XFS_REFCACHE_H__ */
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index fc1cda23b817..d2af0a8381a6 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -24,12 +24,9 @@
 #include "xfs_sb.h"
 #include "xfs_ag.h"
 #include "xfs_dir2.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_inode_item.h"
@@ -116,20 +113,7 @@ xfs_rename(
        int             spaceres;
        int             num_inodes;
-        xfs_itrace_entry(src_dp);
+        trace_xfs_rename(src_dp, target_dp, src_name, target_name);
-        xfs_itrace_entry(target_dp);
-        if (DM_EVENT_ENABLED(src_dp, DM_EVENT_RENAME) ||
-            DM_EVENT_ENABLED(target_dp, DM_EVENT_RENAME)) {
-                error = XFS_SEND_NAMESP(mp, DM_EVENT_RENAME,
-                                        src_dp, DM_RIGHT_NULL,
-                                        target_dp, DM_RIGHT_NULL,
-                                        src_name->name, target_name->name,
-                                        0, 0, 0);
-                if (error)
-                        return error;
-        }
-        /* Return through std_return after this point. */
        new_parent = (src_dp != target_dp);
        src_is_directory = ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR);
@@ -184,26 +168,14 @@ xfs_rename(
        /*
         * Join all the inodes to the transaction. From this point on,
         * we can rely on either trans_commit or trans_cancel to unlock
-         * them.  Note that we need to add a vnode reference to the
+         * them.
-         * directories since trans_commit & trans_cancel will decrement
-         * them when they unlock the inodes.  Also, we need to be careful
-         * not to add an inode to the transaction more than once.
         */
-        IHOLD(src_dp);
+        xfs_trans_ijoin_ref(tp, src_dp, XFS_ILOCK_EXCL);
-        xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL);
+        if (new_parent)
+                xfs_trans_ijoin_ref(tp, target_dp, XFS_ILOCK_EXCL);
-        if (new_parent) {
+        xfs_trans_ijoin_ref(tp, src_ip, XFS_ILOCK_EXCL);
-                IHOLD(target_dp);
+        if (target_ip)
-                xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL);
+                xfs_trans_ijoin_ref(tp, target_ip, XFS_ILOCK_EXCL);
-        }
-        IHOLD(src_ip);
-        xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL);
-        if (target_ip) {
-                IHOLD(target_ip);
-                xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL);
-        }
        /*
         * If we are using project inheritance, we only allow renames
@@ -211,7 +183,7 @@ xfs_rename(
         * tree quota mechanism would be circumvented.
         */
        if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
-                     (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) {
+                     (xfs_get_projid(target_dp) != xfs_get_projid(src_ip)))) {
                error = XFS_ERROR(EXDEV);
                goto error_return;
        }
@@ -239,7 +211,9 @@ xfs_rename(
                        goto error_return;
                if (error)
                        goto abort_return;
-                xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+                xfs_trans_ichgtime(tp, target_dp,
+                                        XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
                if (new_parent && src_is_directory) {
                        error = xfs_bumplink(tp, target_dp);
@@ -277,7 +251,9 @@ xfs_rename(
                                        &first_block, &free_list, spaceres);
                if (error)
                        goto abort_return;
-                xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+                xfs_trans_ichgtime(tp, target_dp,
+                                        XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
                /*
                 * Decrement the link count on the target since the target
@@ -320,7 +296,7 @@ xfs_rename(
         * inode isn't really being changed, but old unix file systems did
         * it and some incremental backup programs won't work without it.
         */
-        xfs_ichgtime(src_ip, XFS_ICHGTIME_CHG);
+        xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG);
        /*
         * Adjust the link count on src_dp.  This is necessary when
@@ -343,7 +319,7 @@ xfs_rename(
        if (error)
                goto abort_return;
-        xfs_ichgtime(src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+        xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
        xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE);
        if (new_parent)
                xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
@@ -369,26 +345,13 @@ xfs_rename(
         * trans_commit will unlock src_ip, target_ip & decrement
         * the vnode references.
         */
-        error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+        return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-        /* Fall through to std_return with error = 0 or errno from
-         * xfs_trans_commit      */
-std_return:
-        if (DM_EVENT_ENABLED(src_dp, DM_EVENT_POSTRENAME) ||
-            DM_EVENT_ENABLED(target_dp, DM_EVENT_POSTRENAME)) {
-                (void) XFS_SEND_NAMESP (mp, DM_EVENT_POSTRENAME,
-                                        src_dp, DM_RIGHT_NULL,
-                                        target_dp, DM_RIGHT_NULL,
-                                        src_name->name, target_name->name,
-                                        0, error, 0);
-        }
-        return error;
 abort_return:
        cancel_flags |= XFS_TRANS_ABORT;
-        /* FALLTHROUGH */
 error_return:
        xfs_bmap_cancel(&free_list);
        xfs_trans_cancel(tp, cancel_flags);
-        goto std_return;
+ std_return:
+        return error;
 }
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index a2d32ce335aa..12a191385310 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -25,17 +25,10 @@
 #include "xfs_sb.h"
 #include "xfs_ag.h"
 #include "xfs_dir2.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
 #include "xfs_alloc.h"
 #include "xfs_bmap.h"
 #include "xfs_rtalloc.h"
@@ -46,6 +39,7 @@
 #include "xfs_trans_space.h"
 #include "xfs_utils.h"
 #include "xfs_trace.h"
+#include "xfs_buf.h"
 /*
@@ -129,7 +123,7 @@ xfs_growfs_rt_alloc(
                cancelflags |= XFS_TRANS_ABORT;
                error = xfs_bmapi(tp, ip, oblocks, nblocks - oblocks,
                        XFS_BMAPI_WRITE | XFS_BMAPI_METADATA, &firstblock,
-                        resblks, &map, &nmap, &flist, NULL);
+                        resblks, &map, &nmap, &flist);
                if (!error && nmap < 1)
                        error = XFS_ERROR(ENOSPC);
                if (error)
@@ -1890,13 +1884,13 @@ xfs_growfs_rt(
        /*
         * Read in the last block of the device, make sure it exists.
         */
-        error = xfs_read_buf(mp, mp->m_rtdev_targp,
+        bp = xfs_buf_read_uncached(mp, mp->m_rtdev_targp,
-                        XFS_FSB_TO_BB(mp, nrblocks - 1),
+                                XFS_FSB_TO_BB(mp, nrblocks - 1),
-                        XFS_FSB_TO_BB(mp, 1), 0, &bp);
+                                XFS_FSB_TO_B(mp, 1), 0);
-        if (error)
+        if (!bp)
-                return error;
+                return EIO;
-        ASSERT(bp);
        xfs_buf_relse(bp);
        /*
         * Calculate new parameters.  These are the final values to be reached.
         */
@@ -2222,7 +2216,6 @@ xfs_rtmount_init(
 {
        xfs_buf_t       *bp;    /* buffer for last block of subvolume */
        xfs_daddr_t     d;      /* address of last block of subvolume */
-        int             error;  /* error return value */
        xfs_sb_t        *sbp;   /* filesystem superblock copy in mount */
        sbp = &mp->m_sb;
@@ -2249,15 +2242,12 @@ xfs_rtmount_init(
                        (unsigned long long) mp->m_sb.sb_rblocks);
                return XFS_ERROR(EFBIG);
        }
-        error = xfs_read_buf(mp, mp->m_rtdev_targp,
+        bp = xfs_buf_read_uncached(mp, mp->m_rtdev_targp,
-                                d - XFS_FSB_TO_BB(mp, 1),
+                                        d - XFS_FSB_TO_BB(mp, 1),
-                                XFS_FSB_TO_BB(mp, 1), 0, &bp);
+                                        XFS_FSB_TO_B(mp, 1), 0);
-        if (error) {
+        if (!bp) {
-                cmn_err(CE_WARN,
+                cmn_err(CE_WARN, "XFS: realtime device size check failed");
-        "XFS: realtime mount -- xfs_read_buf failed, returned %d", error);
+                return EIO;
-                if (error == ENOSPC)
-                        return XFS_ERROR(EFBIG);
-                return error;
        }
        xfs_buf_relse(bp);
        return 0;
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index e336742a58a4..56861d5daaef 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -24,27 +24,12 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
-#include "xfs_inode_item.h"
-#include "xfs_itable.h"
-#include "xfs_btree.h"
-#include "xfs_alloc.h"
-#include "xfs_ialloc.h"
-#include "xfs_attr.h"
-#include "xfs_bmap.h"
 #include "xfs_error.h"
-#include "xfs_buf_item.h"
 #include "xfs_rw.h"
-#include "xfs_trace.h"
 /*
 * Force a shutdown of the filesystem instantly while keeping
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index 1b017c657494..1eb2ba586814 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -80,10 +80,12 @@ struct xfs_mount;
 #define XFS_SB_VERSION2_RESERVED4BIT    0x00000004
 #define XFS_SB_VERSION2_ATTR2BIT        0x00000008      /* Inline attr rework */
 #define XFS_SB_VERSION2_PARENTBIT       0x00000010      /* parent pointers */
+#define XFS_SB_VERSION2_PROJID32BIT     0x00000080      /* 32 bit project id */
 #define XFS_SB_VERSION2_OKREALFBITS     \
        (XFS_SB_VERSION2_LAZYSBCOUNTBIT | \
-         XFS_SB_VERSION2_ATTR2BIT)
+         XFS_SB_VERSION2_ATTR2BIT       | \
+         XFS_SB_VERSION2_PROJID32BIT)
 #define XFS_SB_VERSION2_OKSASHFBITS     \
        (0)
 #define XFS_SB_VERSION2_OKREALBITS      \
@@ -495,6 +497,12 @@ static inline void xfs_sb_version_removeattr2(xfs_sb_t *sbp)
                sbp->sb_versionnum &= ~XFS_SB_VERSION_MOREBITSBIT;
 }
+static inline int xfs_sb_version_hasprojid32bit(xfs_sb_t *sbp)
+{
+        return xfs_sb_version_hasmorebits(sbp) &&
+                (sbp->sb_features2 & XFS_SB_VERSION2_PROJID32BIT);
+}
 /*
 * end of superblock version macros
 */
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 28547dfce037..f6d956b7711e 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1,5 +1,6 @@
 /*
 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * Copyright (C) 2010 Red Hat, Inc.
 * All Rights Reserved.
 *
 * This program is free software; you can redistribute it and/or
@@ -24,16 +25,12 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_error.h"
 #include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_btree.h"
@@ -47,6 +44,7 @@
 #include "xfs_trace.h"
 kmem_zone_t     *xfs_trans_zone;
+kmem_zone_t     *xfs_log_item_desc_zone;
 /*
@@ -597,8 +595,7 @@ _xfs_trans_alloc(
        tp->t_magic = XFS_TRANS_MAGIC;
        tp->t_type = type;
        tp->t_mountp = mp;
-        tp->t_items_free = XFS_LIC_NUM_SLOTS;
+        INIT_LIST_HEAD(&tp->t_items);
-        xfs_lic_init(&(tp->t_items));
        INIT_LIST_HEAD(&tp->t_busy);
        return tp;
 }
@@ -643,8 +640,7 @@ xfs_trans_dup(
        ntp->t_magic = XFS_TRANS_MAGIC;
        ntp->t_type = tp->t_type;
        ntp->t_mountp = tp->t_mountp;
-        ntp->t_items_free = XFS_LIC_NUM_SLOTS;
+        INIT_LIST_HEAD(&ntp->t_items);
-        xfs_lic_init(&(ntp->t_items));
        INIT_LIST_HEAD(&ntp->t_busy);
        ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
@@ -700,7 +696,7 @@ xfs_trans_reserve(
         * fail if the count would go below zero.
         */
        if (blocks > 0) {
-                error = xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FDBLOCKS,
+                error = xfs_icsb_modify_counters(tp->t_mountp, XFS_SBS_FDBLOCKS,
                                          -((int64_t)blocks), rsvd);
                if (error != 0) {
                        current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
@@ -771,7 +767,7 @@ undo_log:
 undo_blocks:
        if (blocks > 0) {
-                (void) xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FDBLOCKS,
+                xfs_icsb_modify_counters(tp->t_mountp, XFS_SBS_FDBLOCKS,
                                         (int64_t)blocks, rsvd);
                tp->t_blk_res = 0;
        }
@@ -1013,7 +1009,7 @@ void
 xfs_trans_unreserve_and_mod_sb(
        xfs_trans_t     *tp)
 {
-        xfs_mod_sb_t    msb[14];        /* If you add cases, add entries */
+        xfs_mod_sb_t    msb[9]; /* If you add cases, add entries */
        xfs_mod_sb_t    *msbp;
        xfs_mount_t     *mp = tp->t_mountp;
        /* REFERENCED */
@@ -1021,55 +1017,61 @@ xfs_trans_unreserve_and_mod_sb(
        int             rsvd;
        int64_t         blkdelta = 0;
        int64_t         rtxdelta = 0;
+        int64_t         idelta = 0;
+        int64_t         ifreedelta = 0;
        msbp = msb;
        rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
-        /* calculate free blocks delta */
+        /* calculate deltas */
        if (tp->t_blk_res > 0)
                blkdelta = tp->t_blk_res;
        if ((tp->t_fdblocks_delta != 0) &&
            (xfs_sb_version_haslazysbcount(&mp->m_sb) ||
             (tp->t_flags & XFS_TRANS_SB_DIRTY)))
                blkdelta += tp->t_fdblocks_delta;
-        if (blkdelta != 0) {
-                msbp->msb_field = XFS_SBS_FDBLOCKS;
-                msbp->msb_delta = blkdelta;
-                msbp++;
-        }
-        /* calculate free realtime extents delta */
        if (tp->t_rtx_res > 0)
                rtxdelta = tp->t_rtx_res;
        if ((tp->t_frextents_delta != 0) &&
            (tp->t_flags & XFS_TRANS_SB_DIRTY))
                rtxdelta += tp->t_frextents_delta;
+        if (xfs_sb_version_haslazysbcount(&mp->m_sb) ||
+             (tp->t_flags & XFS_TRANS_SB_DIRTY)) {
+                idelta = tp->t_icount_delta;
+                ifreedelta = tp->t_ifree_delta;
+        }
+        /* apply the per-cpu counters */
+        if (blkdelta) {
+                error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
+                                                 blkdelta, rsvd);
+                if (error)
+                        goto out;
+        }
+        if (idelta) {
+                error = xfs_icsb_modify_counters(mp, XFS_SBS_ICOUNT,
+                                                 idelta, rsvd);
+                if (error)
+                        goto out_undo_fdblocks;
+        }
+        if (ifreedelta) {
+                error = xfs_icsb_modify_counters(mp, XFS_SBS_IFREE,
+                                                 ifreedelta, rsvd);
+                if (error)
+                        goto out_undo_icount;
+        }
+        /* apply remaining deltas */
        if (rtxdelta != 0) {
                msbp->msb_field = XFS_SBS_FREXTENTS;
                msbp->msb_delta = rtxdelta;
                msbp++;
        }
-        /* apply remaining deltas */
-        if (xfs_sb_version_haslazysbcount(&mp->m_sb) ||
-             (tp->t_flags & XFS_TRANS_SB_DIRTY)) {
-                if (tp->t_icount_delta != 0) {
-                        msbp->msb_field = XFS_SBS_ICOUNT;
-                        msbp->msb_delta = tp->t_icount_delta;
-                        msbp++;
-                }
-                if (tp->t_ifree_delta != 0) {
-                        msbp->msb_field = XFS_SBS_IFREE;
-                        msbp->msb_delta = tp->t_ifree_delta;
-                        msbp++;
-                }
-        }
        if (tp->t_flags & XFS_TRANS_SB_DIRTY) {
                if (tp->t_dblocks_delta != 0) {
                        msbp->msb_field = XFS_SBS_DBLOCKS;
@@ -1119,7 +1121,125 @@ xfs_trans_unreserve_and_mod_sb(
        if (msbp > msb) {
                error = xfs_mod_incore_sb_batch(tp->t_mountp, msb,
                        (uint)(msbp - msb), rsvd);
-                ASSERT(error == 0);
+                if (error)
+                        goto out_undo_ifreecount;
+        }
+        return;
+out_undo_ifreecount:
+        if (ifreedelta)
+                xfs_icsb_modify_counters(mp, XFS_SBS_IFREE, -ifreedelta, rsvd);
+out_undo_icount:
+        if (idelta)
+                xfs_icsb_modify_counters(mp, XFS_SBS_ICOUNT, -idelta, rsvd);
+out_undo_fdblocks:
+        if (blkdelta)
+                xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, -blkdelta, rsvd);
+out:
+        ASSERT(error = 0);
+        return;
+}
+/*
+ * Add the given log item to the transaction's list of log items.
+ *
+ * The log item will now point to its new descriptor with its li_desc field.
+ */
+void
+xfs_trans_add_item(
+        struct xfs_trans        *tp,
+        struct xfs_log_item     *lip)
+{
+        struct xfs_log_item_desc *lidp;
+        ASSERT(lip->li_mountp = tp->t_mountp);
+        ASSERT(lip->li_ailp = tp->t_mountp->m_ail);
+        lidp = kmem_zone_zalloc(xfs_log_item_desc_zone, KM_SLEEP | KM_NOFS);
+        lidp->lid_item = lip;
+        lidp->lid_flags = 0;
+        lidp->lid_size = 0;
+        list_add_tail(&lidp->lid_trans, &tp->t_items);
+        lip->li_desc = lidp;
+}
+STATIC void
+xfs_trans_free_item_desc(
+        struct xfs_log_item_desc *lidp)
+{
+        list_del_init(&lidp->lid_trans);
+        kmem_zone_free(xfs_log_item_desc_zone, lidp);
+}
+/*
+ * Unlink and free the given descriptor.
+ */
+void
+xfs_trans_del_item(
+        struct xfs_log_item     *lip)
+{
+        xfs_trans_free_item_desc(lip->li_desc);
+        lip->li_desc = NULL;
+}
+/*
+ * Unlock all of the items of a transaction and free all the descriptors
+ * of that transaction.
+ */
+void
+xfs_trans_free_items(
+        struct xfs_trans        *tp,
+        xfs_lsn_t               commit_lsn,
+        int                     flags)
+{
+        struct xfs_log_item_desc *lidp, *next;
+        list_for_each_entry_safe(lidp, next, &tp->t_items, lid_trans) {
+                struct xfs_log_item     *lip = lidp->lid_item;
+                lip->li_desc = NULL;
+                if (commit_lsn != NULLCOMMITLSN)
+                        IOP_COMMITTING(lip, commit_lsn);
+                if (flags & XFS_TRANS_ABORT)
+                        lip->li_flags |= XFS_LI_ABORTED;
+                IOP_UNLOCK(lip);
+                xfs_trans_free_item_desc(lidp);
+        }
+}
+/*
+ * Unlock the items associated with a transaction.
+ *
+ * Items which were not logged should be freed.  Those which were logged must
+ * still be tracked so they can be unpinned when the transaction commits.
+ */
+STATIC void
+xfs_trans_unlock_items(
+        struct xfs_trans        *tp,
+        xfs_lsn_t               commit_lsn)
+{
+        struct xfs_log_item_desc *lidp, *next;
+        list_for_each_entry_safe(lidp, next, &tp->t_items, lid_trans) {
+                struct xfs_log_item     *lip = lidp->lid_item;
+                lip->li_desc = NULL;
+                if (commit_lsn != NULLCOMMITLSN)
+                        IOP_COMMITTING(lip, commit_lsn);
+                IOP_UNLOCK(lip);
+                /*
+                 * Free the descriptor if the item is not dirty
+                 * within this transaction.
+                 */
+                if (!(lidp->lid_flags & XFS_LID_DIRTY))
+                        xfs_trans_free_item_desc(lidp);
        }
 }
@@ -1134,30 +1254,27 @@ xfs_trans_count_vecs(
        struct xfs_trans        *tp)
 {
        int                     nvecs;
-        xfs_log_item_desc_t     *lidp;
+        struct xfs_log_item_desc *lidp;
        nvecs = 1;
-        lidp = xfs_trans_first_item(tp);
-        ASSERT(lidp != NULL);
        /* In the non-debug case we need to start bailing out if we
         * didn't find a log_item here, return zero and let trans_commit
         * deal with it.
         */
-        if (lidp == NULL)
+        if (list_empty(&tp->t_items)) {
+                ASSERT(0);
                return 0;
+        }
-        while (lidp != NULL) {
+        list_for_each_entry(lidp, &tp->t_items, lid_trans) {
                /*
                 * Skip items which aren't dirty in this transaction.
                 */
-                if (!(lidp->lid_flags & XFS_LID_DIRTY)) {
+                if (!(lidp->lid_flags & XFS_LID_DIRTY))
-                        lidp = xfs_trans_next_item(tp, lidp);
                        continue;
-                }
                lidp->lid_size = IOP_SIZE(lidp->lid_item);
                nvecs += lidp->lid_size;
-                lidp = xfs_trans_next_item(tp, lidp);
        }
        return nvecs;
@@ -1177,7 +1294,7 @@ xfs_trans_fill_vecs(
        struct xfs_trans        *tp,
        struct xfs_log_iovec    *log_vector)
 {
-        xfs_log_item_desc_t     *lidp;
+        struct xfs_log_item_desc *lidp;
        struct xfs_log_iovec    *vecp;
        uint                    nitems;
@@ -1188,14 +1305,11 @@ xfs_trans_fill_vecs(
        vecp = log_vector + 1;
        nitems = 0;
-        lidp = xfs_trans_first_item(tp);
+        ASSERT(!list_empty(&tp->t_items));
-        ASSERT(lidp);
+        list_for_each_entry(lidp, &tp->t_items, lid_trans) {
-        while (lidp) {
                /* Skip items which aren't dirty in this transaction. */
-                if (!(lidp->lid_flags & XFS_LID_DIRTY)) {
+                if (!(lidp->lid_flags & XFS_LID_DIRTY))
-                        lidp = xfs_trans_next_item(tp, lidp);
                        continue;
-                }
                /*
                 * The item may be marked dirty but not log anything.  This can
@@ -1206,7 +1320,6 @@ xfs_trans_fill_vecs(
                IOP_FORMAT(lidp->lid_item, vecp);
                vecp += lidp->lid_size;
                IOP_PIN(lidp->lid_item);
-                lidp = xfs_trans_next_item(tp, lidp);
        }
        /*
@@ -1284,7 +1397,7 @@ xfs_trans_item_committed(
         * log item flags, if anyone else stales the buffer we do not want to
         * pay any attention to it.
         */
-        IOP_UNPIN(lip);
+        IOP_UNPIN(lip, 0);
 }
 /*
@@ -1298,27 +1411,15 @@ xfs_trans_item_committed(
 */
 STATIC void
 xfs_trans_committed(
-        struct xfs_trans        *tp,
+        void                    *arg,
        int                     abortflag)
 {
-        xfs_log_item_desc_t     *lidp;
+        struct xfs_trans        *tp = arg;
-        xfs_log_item_chunk_t    *licp;
+        struct xfs_log_item_desc *lidp, *next;
-        xfs_log_item_chunk_t    *next_licp;
-        /* Call the transaction's completion callback if there is one. */
-        if (tp->t_callback != NULL)
-                tp->t_callback(tp, tp->t_callarg);
-        for (lidp = xfs_trans_first_item(tp);
+        list_for_each_entry_safe(lidp, next, &tp->t_items, lid_trans) {
-             lidp != NULL;
-             lidp = xfs_trans_next_item(tp, lidp)) {
                xfs_trans_item_committed(lidp->lid_item, tp->t_lsn, abortflag);
-        }
+                xfs_trans_free_item_desc(lidp);
-        /* free the item chunks, ignoring the embedded chunk */
-        for (licp = tp->t_items.lic_next; licp != NULL; licp = next_licp) {
-                next_licp = licp->lic_next;
-                kmem_free(licp);
        }
        xfs_trans_free(tp);
@@ -1333,16 +1434,14 @@ xfs_trans_uncommit(
        struct xfs_trans        *tp,
        uint                    flags)
 {
-        xfs_log_item_desc_t     *lidp;
+        struct xfs_log_item_desc *lidp;
-        for (lidp = xfs_trans_first_item(tp);
+        list_for_each_entry(lidp, &tp->t_items, lid_trans) {
-             lidp != NULL;
-             lidp = xfs_trans_next_item(tp, lidp)) {
                /*
                 * Unpin all but those that aren't dirty.
                 */
                if (lidp->lid_flags & XFS_LID_DIRTY)
-                        IOP_UNPIN_REMOVE(lidp->lid_item, tp);
+                        IOP_UNPIN(lidp->lid_item, 1);
        }
        xfs_trans_unreserve_and_mod_sb(tp);
@@ -1445,7 +1544,7 @@ xfs_trans_commit_iclog(
         * running in simulation mode (the log is explicitly turned
         * off).
         */
-        tp->t_logcb.cb_func = (void(*)(void*, int))xfs_trans_committed;
+        tp->t_logcb.cb_func = xfs_trans_committed;
        tp->t_logcb.cb_arg = tp;
        /*
@@ -1508,33 +1607,28 @@ STATIC struct xfs_log_vec *
 xfs_trans_alloc_log_vecs(
        xfs_trans_t     *tp)
 {
-        xfs_log_item_desc_t     *lidp;
+        struct xfs_log_item_desc *lidp;
        struct xfs_log_vec      *lv = NULL;
        struct xfs_log_vec      *ret_lv = NULL;
-        lidp = xfs_trans_first_item(tp);
        /* Bail out if we didn't find a log item.  */
-        if (!lidp) {
+        if (list_empty(&tp->t_items)) {
                ASSERT(0);
                return NULL;
        }
-        while (lidp != NULL) {
+        list_for_each_entry(lidp, &tp->t_items, lid_trans) {
                struct xfs_log_vec *new_lv;
                /* Skip items which aren't dirty in this transaction. */
-                if (!(lidp->lid_flags & XFS_LID_DIRTY)) {
+                if (!(lidp->lid_flags & XFS_LID_DIRTY))
-                        lidp = xfs_trans_next_item(tp, lidp);
                        continue;
-                }
                /* Skip items that do not have any vectors for writing */
                lidp->lid_size = IOP_SIZE(lidp->lid_item);
-                if (!lidp->lid_size) {
+                if (!lidp->lid_size)
-                        lidp = xfs_trans_next_item(tp, lidp);
                        continue;
-                }
                new_lv = kmem_zalloc(sizeof(*new_lv) +
                                lidp->lid_size * sizeof(struct xfs_log_iovec),
@@ -1549,7 +1643,6 @@ xfs_trans_alloc_log_vecs(
                else
                        lv->lv_next = new_lv;
                lv = new_lv;
-                lidp = xfs_trans_next_item(tp, lidp);
        }
        return ret_lv;
@@ -1579,9 +1672,6 @@ xfs_trans_commit_cil(
                return error;
        current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
-        /* xfs_trans_free_items() unlocks them first */
-        xfs_trans_free_items(tp, *commit_lsn, 0);
        xfs_trans_free(tp);
        return 0;
 }
@@ -1708,12 +1798,6 @@ xfs_trans_cancel(
        int                     flags)
 {
        int                     log_flags;
-#ifdef DEBUG
-        xfs_log_item_chunk_t    *licp;
-        xfs_log_item_desc_t     *lidp;
-        xfs_log_item_t          *lip;
-        int                     i;
-#endif
        xfs_mount_t             *mp = tp->t_mountp;
        /*
@@ -1732,21 +1816,11 @@ xfs_trans_cancel(
                xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
        }
 #ifdef DEBUG
-        if (!(flags & XFS_TRANS_ABORT)) {
+        if (!(flags & XFS_TRANS_ABORT) && !XFS_FORCED_SHUTDOWN(mp)) {
-                licp = &(tp->t_items);
+                struct xfs_log_item_desc *lidp;
-                while (licp != NULL) {
-                        lidp = licp->lic_descs;
+                list_for_each_entry(lidp, &tp->t_items, lid_trans)
-                        for (i = 0; i < licp->lic_unused; i++, lidp++) {
+                        ASSERT(!(lidp->lid_item->li_type == XFS_LI_EFD));
-                                if (xfs_lic_isfree(licp, i)) {
-                                        continue;
-                                }
-                                lip = lidp->lid_item;
-                                if (!XFS_FORCED_SHUTDOWN(mp))
-                                        ASSERT(!(lip->li_type == XFS_LI_EFD));
-                        }
-                        licp = licp->lic_next;
-                }
        }
 #endif
        xfs_trans_unreserve_and_mod_sb(tp);
@@ -1834,7 +1908,6 @@ xfs_trans_roll(
        if (error)
                return error;
-        xfs_trans_ijoin(trans, dp, XFS_ILOCK_EXCL);
+        xfs_trans_ijoin(trans, dp);
-        xfs_trans_ihold(trans, dp);
        return 0;
 }
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index e639e8e9a2a9..246286b77a86 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -161,105 +161,14 @@ typedef struct xfs_trans_header {
 * the amount of space needed to log the item it describes
 * once we get to commit processing (see xfs_trans_commit()).
 */
-typedef struct xfs_log_item_desc {
+struct xfs_log_item_desc {
        struct xfs_log_item     *lid_item;
-        ushort          lid_size;
+        ushort                  lid_size;
-        unsigned char   lid_flags;
+        unsigned char           lid_flags;
-        unsigned char   lid_index;
+        struct list_head        lid_trans;
-} xfs_log_item_desc_t;
+};
 #define XFS_LID_DIRTY           0x1
-#define XFS_LID_PINNED          0x2
-/*
- * This structure is used to maintain a chunk list of log_item_desc
- * structures. The free field is a bitmask indicating which descriptors
- * in this chunk's array are free.  The unused field is the first value
- * not used since this chunk was allocated.
- */
-#define XFS_LIC_NUM_SLOTS       15
-typedef struct xfs_log_item_chunk {
-        struct xfs_log_item_chunk       *lic_next;
-        ushort                          lic_free;
-        ushort                          lic_unused;
-        xfs_log_item_desc_t             lic_descs[XFS_LIC_NUM_SLOTS];
-} xfs_log_item_chunk_t;
-#define XFS_LIC_MAX_SLOT        (XFS_LIC_NUM_SLOTS - 1)
-#define XFS_LIC_FREEMASK        ((1 << XFS_LIC_NUM_SLOTS) - 1)
-/*
- * Initialize the given chunk.  Set the chunk's free descriptor mask
- * to indicate that all descriptors are free.  The caller gets to set
- * lic_unused to the right value (0 matches all free).  The
- * lic_descs.lid_index values are set up as each desc is allocated.
- */
-static inline void xfs_lic_init(xfs_log_item_chunk_t *cp)
-{
-        cp->lic_free = XFS_LIC_FREEMASK;
-}
-static inline void xfs_lic_init_slot(xfs_log_item_chunk_t *cp, int slot)
-{
-        cp->lic_descs[slot].lid_index = (unsigned char)(slot);
-}
-static inline int xfs_lic_vacancy(xfs_log_item_chunk_t *cp)
-{
-        return cp->lic_free & XFS_LIC_FREEMASK;
-}
-static inline void xfs_lic_all_free(xfs_log_item_chunk_t *cp)
-{
-        cp->lic_free = XFS_LIC_FREEMASK;
-}
-static inline int xfs_lic_are_all_free(xfs_log_item_chunk_t *cp)
-{
-        return ((cp->lic_free & XFS_LIC_FREEMASK) == XFS_LIC_FREEMASK);
-}
-static inline int xfs_lic_isfree(xfs_log_item_chunk_t *cp, int slot)
-{
-        return (cp->lic_free & (1 << slot));
-}
-static inline void xfs_lic_claim(xfs_log_item_chunk_t *cp, int slot)
-{
-        cp->lic_free &= ~(1 << slot);
-}
-static inline void xfs_lic_relse(xfs_log_item_chunk_t *cp, int slot)
-{
-        cp->lic_free |= 1 << slot;
-}
-static inline xfs_log_item_desc_t *
-xfs_lic_slot(xfs_log_item_chunk_t *cp, int slot)
-{
-        return &(cp->lic_descs[slot]);
-}
-static inline int xfs_lic_desc_to_slot(xfs_log_item_desc_t *dp)
-{
-        return (uint)dp->lid_index;
-}
-/*
- * Calculate the address of a chunk given a descriptor pointer:
- * dp - dp->lid_index give the address of the start of the lic_descs array.
- * From this we subtract the offset of the lic_descs field in a chunk.
- * All of this yields the address of the chunk, which is
- * cast to a chunk pointer.
- */
-static inline xfs_log_item_chunk_t *
-xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
-{
-        return (xfs_log_item_chunk_t*) \
-                (((xfs_caddr_t)((dp) - (dp)->lid_index)) - \
-                (xfs_caddr_t)(((xfs_log_item_chunk_t*)0)->lic_descs));
-}
 #define XFS_TRANS_MAGIC         0x5452414E      /* 'TRAN' */
 /*
@@ -275,8 +184,6 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
 /*
 * Values for call flags parameter.
 */
-#define XFS_TRANS_NOSLEEP               0x1
-#define XFS_TRANS_WAIT                  0x2
 #define XFS_TRANS_RELEASE_LOG_RES       0x4
 #define XFS_TRANS_ABORT                 0x8
@@ -438,8 +345,7 @@ typedef struct xfs_item_ops {
        uint (*iop_size)(xfs_log_item_t *);
        void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *);
        void (*iop_pin)(xfs_log_item_t *);
-        void (*iop_unpin)(xfs_log_item_t *);
+        void (*iop_unpin)(xfs_log_item_t *, int remove);
-        void (*iop_unpin_remove)(xfs_log_item_t *, struct xfs_trans *);
        uint (*iop_trylock)(xfs_log_item_t *);
        void (*iop_unlock)(xfs_log_item_t *);
        xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t);
@@ -451,8 +357,7 @@ typedef struct xfs_item_ops {
 #define IOP_SIZE(ip)            (*(ip)->li_ops->iop_size)(ip)
 #define IOP_FORMAT(ip,vp)       (*(ip)->li_ops->iop_format)(ip, vp)
 #define IOP_PIN(ip)             (*(ip)->li_ops->iop_pin)(ip)
-#define IOP_UNPIN(ip)           (*(ip)->li_ops->iop_unpin)(ip)
+#define IOP_UNPIN(ip, remove)   (*(ip)->li_ops->iop_unpin)(ip, remove)
-#define IOP_UNPIN_REMOVE(ip,tp) (*(ip)->li_ops->iop_unpin_remove)(ip, tp)
 #define IOP_TRYLOCK(ip)         (*(ip)->li_ops->iop_trylock)(ip)
 #define IOP_UNLOCK(ip)          (*(ip)->li_ops->iop_unlock)(ip)
 #define IOP_COMMITTED(ip, lsn)  (*(ip)->li_ops->iop_committed)(ip, lsn)
@@ -494,8 +399,6 @@ typedef struct xfs_trans {
                                                 * transaction. */
        struct xfs_mount        *t_mountp;      /* ptr to fs mount struct */
        struct xfs_dquot_acct   *t_dqinfo;      /* acctg info for dquots */
-        xfs_trans_callback_t    t_callback;     /* transaction callback */
-        void                    *t_callarg;     /* callback arg */
        unsigned int            t_flags;        /* misc flags */
        int64_t                 t_icount_delta; /* superblock icount change */
        int64_t                 t_ifree_delta;  /* superblock ifree change */
@@ -516,8 +419,7 @@ typedef struct xfs_trans {
        int64_t                 t_rblocks_delta;/* superblock rblocks change */
        int64_t                 t_rextents_delta;/* superblocks rextents chg */
        int64_t                 t_rextslog_delta;/* superblocks rextslog chg */
-        unsigned int            t_items_free;   /* log item descs free */
+        struct list_head        t_items;        /* log item descriptors */
-        xfs_log_item_chunk_t    t_items;        /* first log item desc chunk */
        xfs_trans_header_t      t_header;       /* header for in-log trans */
        struct list_head        t_busy;         /* list of busy extents */
        unsigned long           t_pflags;       /* saved process flags state */
@@ -569,8 +471,9 @@ void		xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint);
 void            xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *);
 int             xfs_trans_iget(struct xfs_mount *, xfs_trans_t *,
                               xfs_ino_t , uint, uint, struct xfs_inode **);
-void            xfs_trans_ijoin(xfs_trans_t *, struct xfs_inode *, uint);
+void            xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int);
-void            xfs_trans_ihold(xfs_trans_t *, struct xfs_inode *);
+void            xfs_trans_ijoin_ref(struct xfs_trans *, struct xfs_inode *, uint);
+void            xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *);
 void            xfs_trans_log_buf(xfs_trans_t *, struct xfs_buf *, uint, uint);
 void            xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint);
 struct xfs_efi_log_item *xfs_trans_get_efi(xfs_trans_t *, uint);
@@ -595,6 +498,7 @@ int		xfs_trans_ail_init(struct xfs_mount *);
 void            xfs_trans_ail_destroy(struct xfs_mount *);
 extern kmem_zone_t      *xfs_trans_zone;
+extern kmem_zone_t      *xfs_log_item_desc_zone;
 #endif  /* __KERNEL__ */
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index e799824f7245..dc9069568ff7 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -24,7 +24,6 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_trans_priv.h"
 #include "xfs_error.h"
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 63d81a22f4fd..c47918c302a5 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -24,14 +24,10 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_buf_item.h"
@@ -51,36 +47,17 @@ xfs_trans_buf_item_match(
        xfs_daddr_t             blkno,
        int                     len)
 {
-        xfs_log_item_chunk_t    *licp;
+        struct xfs_log_item_desc *lidp;
-        xfs_log_item_desc_t     *lidp;
+        struct xfs_buf_log_item *blip;
-        xfs_buf_log_item_t      *blip;
-        int                     i;
        len = BBTOB(len);
-        for (licp = &tp->t_items; licp != NULL; licp = licp->lic_next) {
+        list_for_each_entry(lidp, &tp->t_items, lid_trans) {
-                if (xfs_lic_are_all_free(licp)) {
+                blip = (struct xfs_buf_log_item *)lidp->lid_item;
-                        ASSERT(licp == &tp->t_items);
+                if (blip->bli_item.li_type == XFS_LI_BUF &&
-                        ASSERT(licp->lic_next == NULL);
+                    XFS_BUF_TARGET(blip->bli_buf) == target &&
-                        return NULL;
+                    XFS_BUF_ADDR(blip->bli_buf) == blkno &&
-                }
+                    XFS_BUF_COUNT(blip->bli_buf) == len)
+                        return blip->bli_buf;
-                for (i = 0; i < licp->lic_unused; i++) {
-                        /*
-                         * Skip unoccupied slots.
-                         */
-                        if (xfs_lic_isfree(licp, i))
-                                continue;
-                        lidp = xfs_lic_slot(licp, i);
-                        blip = (xfs_buf_log_item_t *)lidp->lid_item;
-                        if (blip->bli_item.li_type != XFS_LI_BUF)
-                                continue;
-                        if (XFS_BUF_TARGET(blip->bli_buf) == target &&
-                            XFS_BUF_ADDR(blip->bli_buf) == blkno &&
-                            XFS_BUF_COUNT(blip->bli_buf) == len)
-                                return blip->bli_buf;
-                }
        }
        return NULL;
@@ -127,7 +104,7 @@ _xfs_trans_bjoin(
        /*
         * Get a log_item_desc to point at the new item.
         */
-        (void) xfs_trans_add_item(tp, (xfs_log_item_t *)bip);
+        xfs_trans_add_item(tp, &bip->bli_item);
        /*
         * Initialize b_fsprivate2 so we can find it with incore_match()
@@ -359,7 +336,7 @@ xfs_trans_read_buf(
                        ASSERT(!XFS_BUF_ISASYNC(bp));
                        XFS_BUF_READ(bp);
                        xfsbdstrat(tp->t_mountp, bp);
-                        error = xfs_iowait(bp);
+                        error = xfs_buf_iowait(bp);
                        if (error) {
                                xfs_ioerror_alert("xfs_trans_read_buf", mp,
                                                  bp, blkno);
@@ -483,7 +460,6 @@ xfs_trans_brelse(xfs_trans_t	*tp,
 {
        xfs_buf_log_item_t      *bip;
        xfs_log_item_t          *lip;
-        xfs_log_item_desc_t     *lidp;
        /*
         * Default to a normal brelse() call if the tp is NULL.
@@ -514,13 +490,6 @@ xfs_trans_brelse(xfs_trans_t	*tp,
        ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL));
        ASSERT(atomic_read(&bip->bli_refcount) > 0);
-        /*
-         * Find the item descriptor pointing to this buffer's
-         * log item.  It must be there.
-         */
-        lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip);
-        ASSERT(lidp != NULL);
        trace_xfs_trans_brelse(bip);
        /*
@@ -536,7 +505,7 @@ xfs_trans_brelse(xfs_trans_t	*tp,
         * If the buffer is dirty within this transaction, we can't
         * release it until we commit.
         */
-        if (lidp->lid_flags & XFS_LID_DIRTY)
+        if (bip->bli_item.li_desc->lid_flags & XFS_LID_DIRTY)
                return;
        /*
@@ -553,7 +522,7 @@ xfs_trans_brelse(xfs_trans_t	*tp,
        /*
         * Free up the log item descriptor tracking the released item.
         */
-        xfs_trans_free_item(tp, lidp);
+        xfs_trans_del_item(&bip->bli_item);
        /*
         * Clear the hold flag in the buf log item if it is set.
@@ -665,7 +634,6 @@ xfs_trans_log_buf(xfs_trans_t	*tp,
                  uint          last)
 {
        xfs_buf_log_item_t      *bip;
-        xfs_log_item_desc_t     *lidp;
        ASSERT(XFS_BUF_ISBUSY(bp));
        ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
@@ -690,7 +658,7 @@ xfs_trans_log_buf(xfs_trans_t	*tp,
        bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
        ASSERT(atomic_read(&bip->bli_refcount) > 0);
        XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks);
-        bip->bli_item.li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*))xfs_buf_iodone;
+        bip->bli_item.li_cb = xfs_buf_iodone;
        trace_xfs_trans_log_buf(bip);
@@ -707,11 +675,8 @@ xfs_trans_log_buf(xfs_trans_t	*tp,
                bip->bli_format.blf_flags &= ~XFS_BLF_CANCEL;
        }
-        lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip);
-        ASSERT(lidp != NULL);
        tp->t_flags |= XFS_TRANS_DIRTY;
-        lidp->lid_flags |= XFS_LID_DIRTY;
+        bip->bli_item.li_desc->lid_flags |= XFS_LID_DIRTY;
        bip->bli_flags |= XFS_BLI_LOGGED;
        xfs_buf_item_log(bip, first, last);
 }
@@ -740,7 +705,6 @@ xfs_trans_binval(
        xfs_trans_t     *tp,
        xfs_buf_t       *bp)
 {
-        xfs_log_item_desc_t     *lidp;
        xfs_buf_log_item_t      *bip;
        ASSERT(XFS_BUF_ISBUSY(bp));
@@ -748,8 +712,6 @@ xfs_trans_binval(
        ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
        bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
-        lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip);
-        ASSERT(lidp != NULL);
        ASSERT(atomic_read(&bip->bli_refcount) > 0);
        trace_xfs_trans_binval(bip);
@@ -764,7 +726,7 @@ xfs_trans_binval(
                ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY)));
                ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_INODE_BUF));
                ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL);
-                ASSERT(lidp->lid_flags & XFS_LID_DIRTY);
+                ASSERT(bip->bli_item.li_desc->lid_flags & XFS_LID_DIRTY);
                ASSERT(tp->t_flags & XFS_TRANS_DIRTY);
                return;
        }
@@ -797,7 +759,7 @@ xfs_trans_binval(
        bip->bli_format.blf_flags |= XFS_BLF_CANCEL;
        memset((char *)(bip->bli_format.blf_data_map), 0,
              (bip->bli_format.blf_map_size * sizeof(uint)));
-        lidp->lid_flags |= XFS_LID_DIRTY;
+        bip->bli_item.li_desc->lid_flags |= XFS_LID_DIRTY;
        tp->t_flags |= XFS_TRANS_DIRTY;
 }
@@ -853,12 +815,9 @@ xfs_trans_stale_inode_buf(
        ASSERT(atomic_read(&bip->bli_refcount) > 0);
        bip->bli_flags |= XFS_BLI_STALE_INODE;
-        bip->bli_item.li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*))
+        bip->bli_item.li_cb = xfs_buf_iodone;
-                xfs_buf_iodone;
 }
 /*
 * Mark the buffer as being one which contains newly allocated
 * inodes.  We need to make sure that even if this buffer is
diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c
index 27cce2a9c7e9..f783d5e9fa70 100644
--- a/fs/xfs/xfs_trans_extfree.c
+++ b/fs/xfs/xfs_trans_extfree.c
@@ -23,7 +23,6 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_trans_priv.h"
 #include "xfs_extfree_item.h"
@@ -49,9 +48,8 @@ xfs_trans_get_efi(xfs_trans_t	*tp,
        /*
         * Get a log_item_desc to point at the new item.
         */
-        (void) xfs_trans_add_item(tp, (xfs_log_item_t*)efip);
+        xfs_trans_add_item(tp, &efip->efi_item);
+        return efip;
-        return (efip);
 }
 /*
@@ -65,15 +63,11 @@ xfs_trans_log_efi_extent(xfs_trans_t		*tp,
                         xfs_fsblock_t          start_block,
                         xfs_extlen_t           ext_len)
 {
-        xfs_log_item_desc_t     *lidp;
        uint                    next_extent;
        xfs_extent_t            *extp;
-        lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)efip);
-        ASSERT(lidp != NULL);
        tp->t_flags |= XFS_TRANS_DIRTY;
-        lidp->lid_flags |= XFS_LID_DIRTY;
+        efip->efi_item.li_desc->lid_flags |= XFS_LID_DIRTY;
        next_extent = efip->efi_next_extent;
        ASSERT(next_extent < efip->efi_format.efi_nextents);
@@ -106,9 +100,8 @@ xfs_trans_get_efd(xfs_trans_t		*tp,
        /*
         * Get a log_item_desc to point at the new item.
         */
-        (void) xfs_trans_add_item(tp, (xfs_log_item_t*)efdp);
+        xfs_trans_add_item(tp, &efdp->efd_item);
+        return efdp;
-        return (efdp);
 }
 /*
@@ -122,15 +115,11 @@ xfs_trans_log_efd_extent(xfs_trans_t		*tp,
                         xfs_fsblock_t          start_block,
                         xfs_extlen_t           ext_len)
 {
-        xfs_log_item_desc_t     *lidp;
        uint                    next_extent;
        xfs_extent_t            *extp;
-        lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)efdp);
-        ASSERT(lidp != NULL);
        tp->t_flags |= XFS_TRANS_DIRTY;
-        lidp->lid_flags |= XFS_LID_DIRTY;
+        efdp->efd_item.li_desc->lid_flags |= XFS_LID_DIRTY;
        next_extent = efdp->efd_next_extent;
        ASSERT(next_extent < efdp->efd_format.efd_nextents);
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 2559dfec946b..ccb34532768b 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -24,20 +24,16 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_btree.h"
-#include "xfs_ialloc.h"
 #include "xfs_trans_priv.h"
 #include "xfs_inode_item.h"
+#include "xfs_trace.h"
 #ifdef XFS_TRANS_DEBUG
 STATIC void
@@ -47,7 +43,6 @@ xfs_trans_inode_broot_debug(
 #define xfs_trans_inode_broot_debug(ip)
 #endif
 /*
 * Get an inode and join it to the transaction.
 */
@@ -63,76 +58,94 @@ xfs_trans_iget(
        int                     error;
        error = xfs_iget(mp, tp, ino, flags, lock_flags, ipp);
-        if (!error && tp)
+        if (!error && tp) {
-                xfs_trans_ijoin(tp, *ipp, lock_flags);
+                xfs_trans_ijoin(tp, *ipp);
+                (*ipp)->i_itemp->ili_lock_flags = lock_flags;
+        }
        return error;
 }
 /*
- * Add the locked inode to the transaction.
+ * Add a locked inode to the transaction.
- * The inode must be locked, and it cannot be associated with any
+ *
- * transaction.  The caller must specify the locks already held
+ * The inode must be locked, and it cannot be associated with any transaction.
- * on the inode.
 */
 void
 xfs_trans_ijoin(
-        xfs_trans_t     *tp,
+        struct xfs_trans        *tp,
-        xfs_inode_t     *ip,
+        struct xfs_inode        *ip)
-        uint            lock_flags)
 {
        xfs_inode_log_item_t    *iip;
        ASSERT(ip->i_transp == NULL);
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-        ASSERT(lock_flags & XFS_ILOCK_EXCL);
        if (ip->i_itemp == NULL)
                xfs_inode_item_init(ip, ip->i_mount);
        iip = ip->i_itemp;
-        ASSERT(iip->ili_flags == 0);
+        ASSERT(iip->ili_lock_flags == 0);
        /*
         * Get a log_item_desc to point at the new item.
         */
-        (void) xfs_trans_add_item(tp, (xfs_log_item_t*)(iip));
+        xfs_trans_add_item(tp, &iip->ili_item);
        xfs_trans_inode_broot_debug(ip);
        /*
-         * If the IO lock is already held, mark that in the inode log item.
-         */
-        if (lock_flags & XFS_IOLOCK_EXCL) {
-                iip->ili_flags |= XFS_ILI_IOLOCKED_EXCL;
-        } else if (lock_flags & XFS_IOLOCK_SHARED) {
-                iip->ili_flags |= XFS_ILI_IOLOCKED_SHARED;
-        }
-        /*
         * Initialize i_transp so we can find it with xfs_inode_incore()
         * in xfs_trans_iget() above.
         */
        ip->i_transp = tp;
 }
+/*
+ * Add a locked inode to the transaction.
+ *
+ *
+ * Grabs a reference to the inode which will be dropped when the transaction
+ * is commited.  The inode will also be unlocked at that point.  The inode
+ * must be locked, and it cannot be associated with any transaction.
+ */
+void
+xfs_trans_ijoin_ref(
+        struct xfs_trans        *tp,
+        struct xfs_inode        *ip,
+        uint                    lock_flags)
+{
+        xfs_trans_ijoin(tp, ip);
+        IHOLD(ip);
+        ip->i_itemp->ili_lock_flags = lock_flags;
+}
 /*
- * Mark the inode as not needing to be unlocked when the inode item's
+ * Transactional inode timestamp update. Requires the inode to be locked and
- * IOP_UNLOCK() routine is called.  The inode must already be locked
+ * joined to the transaction supplied. Relies on the transaction subsystem to
- * and associated with the given transaction.
+ * track dirty state and update/writeback the inode accordingly.
 */
-/*ARGSUSED*/
 void
-xfs_trans_ihold(
+xfs_trans_ichgtime(
-        xfs_trans_t     *tp,
+        struct xfs_trans        *tp,
-        xfs_inode_t     *ip)
+        struct xfs_inode        *ip,
+        int                     flags)
 {
-        ASSERT(ip->i_transp == tp);
+        struct inode            *inode = VFS_I(ip);
-        ASSERT(ip->i_itemp != NULL);
+        timespec_t              tv;
+        ASSERT(tp);
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+        ASSERT(ip->i_transp == tp);
-        ip->i_itemp->ili_flags |= XFS_ILI_HOLD;
+        tv = current_fs_time(inode->i_sb);
-}
+        if ((flags & XFS_ICHGTIME_MOD) &&
+            !timespec_equal(&inode->i_mtime, &tv)) {
+                inode->i_mtime = tv;
+        }
+        if ((flags & XFS_ICHGTIME_CHG) &&
+            !timespec_equal(&inode->i_ctime, &tv)) {
+                inode->i_ctime = tv;
+        }
+}
 /*
 * This is called to mark the fields indicated in fieldmask as needing
@@ -149,17 +162,12 @@ xfs_trans_log_inode(
        xfs_inode_t     *ip,
        uint            flags)
 {
-        xfs_log_item_desc_t     *lidp;
        ASSERT(ip->i_transp == tp);
        ASSERT(ip->i_itemp != NULL);
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-        lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)(ip->i_itemp));
-        ASSERT(lidp != NULL);
        tp->t_flags |= XFS_TRANS_DIRTY;
-        lidp->lid_flags |= XFS_LID_DIRTY;
+        ip->i_itemp->ili_item.li_desc->lid_flags |= XFS_LID_DIRTY;
        /*
         * Always OR in the bits from the ili_last_fields field.
diff --git a/fs/xfs/xfs_trans_item.c b/fs/xfs/xfs_trans_item.c
deleted file mode 100644
index f11d37d06dcc..000000000000
--- a/fs/xfs/xfs_trans_item.c
+++ /dev/null
@@ -1,441 +0,0 @@
-/*
- * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_types.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_trans_priv.h"
-/* XXX: from here down needed until struct xfs_trans has its own ailp */
-#include "xfs_bit.h"
-#include "xfs_buf_item.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
-#include "xfs_mount.h"
-STATIC int      xfs_trans_unlock_chunk(xfs_log_item_chunk_t *,
-                                        int, int, xfs_lsn_t);
-/*
- * This is called to add the given log item to the transaction's
- * list of log items.  It must find a free log item descriptor
- * or allocate a new one and add the item to that descriptor.
- * The function returns a pointer to item descriptor used to point
- * to the new item.  The log item will now point to its new descriptor
- * with its li_desc field.
- */
-xfs_log_item_desc_t *
-xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip)
-{
-        xfs_log_item_desc_t     *lidp;
-        xfs_log_item_chunk_t    *licp;
-        int                     i=0;
-        /*
-         * If there are no free descriptors, allocate a new chunk
-         * of them and put it at the front of the chunk list.
-         */
-        if (tp->t_items_free == 0) {
-                licp = (xfs_log_item_chunk_t*)
-                       kmem_alloc(sizeof(xfs_log_item_chunk_t), KM_SLEEP);
-                ASSERT(licp != NULL);
-                /*
-                 * Initialize the chunk, and then
-                 * claim the first slot in the newly allocated chunk.
-                 */
-                xfs_lic_init(licp);
-                xfs_lic_claim(licp, 0);
-                licp->lic_unused = 1;
-                xfs_lic_init_slot(licp, 0);
-                lidp = xfs_lic_slot(licp, 0);
-                /*
-                 * Link in the new chunk and update the free count.
-                 */
-                licp->lic_next = tp->t_items.lic_next;
-                tp->t_items.lic_next = licp;
-                tp->t_items_free = XFS_LIC_NUM_SLOTS - 1;
-                /*
-                 * Initialize the descriptor and the generic portion
-                 * of the log item.
-                 *
-                 * Point the new slot at this item and return it.
-                 * Also point the log item at its currently active
-                 * descriptor and set the item's mount pointer.
-                 */
-                lidp->lid_item = lip;
-                lidp->lid_flags = 0;
-                lidp->lid_size = 0;
-                lip->li_desc = lidp;
-                lip->li_mountp = tp->t_mountp;
-                lip->li_ailp = tp->t_mountp->m_ail;
-                return lidp;
-        }
-        /*
-         * Find the free descriptor. It is somewhere in the chunklist
-         * of descriptors.
-         */
-        licp = &tp->t_items;
-        while (licp != NULL) {
-                if (xfs_lic_vacancy(licp)) {
-                        if (licp->lic_unused <= XFS_LIC_MAX_SLOT) {
-                                i = licp->lic_unused;
-                                ASSERT(xfs_lic_isfree(licp, i));
-                                break;
-                        }
-                        for (i = 0; i <= XFS_LIC_MAX_SLOT; i++) {
-                                if (xfs_lic_isfree(licp, i))
-                                        break;
-                        }
-                        ASSERT(i <= XFS_LIC_MAX_SLOT);
-                        break;
-                }
-                licp = licp->lic_next;
-        }
-        ASSERT(licp != NULL);
-        /*
-         * If we find a free descriptor, claim it,
-         * initialize it, and return it.
-         */
-        xfs_lic_claim(licp, i);
-        if (licp->lic_unused <= i) {
-                licp->lic_unused = i + 1;
-                xfs_lic_init_slot(licp, i);
-        }
-        lidp = xfs_lic_slot(licp, i);
-        tp->t_items_free--;
-        lidp->lid_item = lip;
-        lidp->lid_flags = 0;
-        lidp->lid_size = 0;
-        lip->li_desc = lidp;
-        lip->li_mountp = tp->t_mountp;
-        lip->li_ailp = tp->t_mountp->m_ail;
-        return lidp;
-}
-/*
- * Free the given descriptor.
- *
- * This requires setting the bit in the chunk's free mask corresponding
- * to the given slot.
- */
-void
-xfs_trans_free_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp)
-{
-        uint                    slot;
-        xfs_log_item_chunk_t    *licp;
-        xfs_log_item_chunk_t    **licpp;
-        slot = xfs_lic_desc_to_slot(lidp);
-        licp = xfs_lic_desc_to_chunk(lidp);
-        xfs_lic_relse(licp, slot);
-        lidp->lid_item->li_desc = NULL;
-        tp->t_items_free++;
-        /*
-         * If there are no more used items in the chunk and this is not
-         * the chunk embedded in the transaction structure, then free
-         * the chunk. First pull it from the chunk list and then
-         * free it back to the heap.  We didn't bother with a doubly
-         * linked list here because the lists should be very short
-         * and this is not a performance path.  It's better to save
-         * the memory of the extra pointer.
-         *
-         * Also decrement the transaction structure's count of free items
-         * by the number in a chunk since we are freeing an empty chunk.
-         */
-        if (xfs_lic_are_all_free(licp) && (licp != &(tp->t_items))) {
-                licpp = &(tp->t_items.lic_next);
-                while (*licpp != licp) {
-                        ASSERT(*licpp != NULL);
-                        licpp = &((*licpp)->lic_next);
-                }
-                *licpp = licp->lic_next;
-                kmem_free(licp);
-                tp->t_items_free -= XFS_LIC_NUM_SLOTS;
-        }
-}
-/*
- * This is called to find the descriptor corresponding to the given
- * log item.  It returns a pointer to the descriptor.
- * The log item MUST have a corresponding descriptor in the given
- * transaction.  This routine does not return NULL, it panics.
- *
- * The descriptor pointer is kept in the log item's li_desc field.
- * Just return it.
- */
-/*ARGSUSED*/
-xfs_log_item_desc_t *
-xfs_trans_find_item(xfs_trans_t *tp, xfs_log_item_t *lip)
-{
-        ASSERT(lip->li_desc != NULL);
-        return lip->li_desc;
-}
-/*
- * Return a pointer to the first descriptor in the chunk list.
- * This does not return NULL if there are none, it panics.
- *
- * The first descriptor must be in either the first or second chunk.
- * This is because the only chunk allowed to be empty is the first.
- * All others are freed when they become empty.
- *
- * At some point this and xfs_trans_next_item() should be optimized
- * to quickly look at the mask to determine if there is anything to
- * look at.
- */
-xfs_log_item_desc_t *
-xfs_trans_first_item(xfs_trans_t *tp)
-{
-        xfs_log_item_chunk_t    *licp;
-        int                     i;
-        licp = &tp->t_items;
-        /*
-         * If it's not in the first chunk, skip to the second.
-         */
-        if (xfs_lic_are_all_free(licp)) {
-                licp = licp->lic_next;
-        }
-        /*
-         * Return the first non-free descriptor in the chunk.
-         */
-        ASSERT(!xfs_lic_are_all_free(licp));
-        for (i = 0; i < licp->lic_unused; i++) {
-                if (xfs_lic_isfree(licp, i)) {
-                        continue;
-                }
-                return xfs_lic_slot(licp, i);
-        }
-        cmn_err(CE_WARN, "xfs_trans_first_item() -- no first item");
-        return NULL;
-}
-/*
- * Given a descriptor, return the next descriptor in the chunk list.
- * This returns NULL if there are no more used descriptors in the list.
- *
- * We do this by first locating the chunk in which the descriptor resides,
- * and then scanning forward in the chunk and the list for the next
- * used descriptor.
- */
-/*ARGSUSED*/
-xfs_log_item_desc_t *
-xfs_trans_next_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp)
-{
-        xfs_log_item_chunk_t    *licp;
-        int                     i;
-        licp = xfs_lic_desc_to_chunk(lidp);
-        /*
-         * First search the rest of the chunk. The for loop keeps us
-         * from referencing things beyond the end of the chunk.
-         */
-        for (i = (int)xfs_lic_desc_to_slot(lidp) + 1; i < licp->lic_unused; i++) {
-                if (xfs_lic_isfree(licp, i)) {
-                        continue;
-                }
-                return xfs_lic_slot(licp, i);
-        }
-        /*
-         * Now search the next chunk.  It must be there, because the
-         * next chunk would have been freed if it were empty.
-         * If there is no next chunk, return NULL.
-         */
-        if (licp->lic_next == NULL) {
-                return NULL;
-        }
-        licp = licp->lic_next;
-        ASSERT(!xfs_lic_are_all_free(licp));
-        for (i = 0; i < licp->lic_unused; i++) {
-                if (xfs_lic_isfree(licp, i)) {
-                        continue;
-                }
-                return xfs_lic_slot(licp, i);
-        }
-        ASSERT(0);
-        /* NOTREACHED */
-        return NULL; /* keep gcc quite */
-}
-/*
- * This is called to unlock all of the items of a transaction and to free
- * all the descriptors of that transaction.
- *
- * It walks the list of descriptors and unlocks each item.  It frees
- * each chunk except that embedded in the transaction as it goes along.
- */
-void
-xfs_trans_free_items(
-        xfs_trans_t     *tp,
-        xfs_lsn_t       commit_lsn,
-        int             flags)
-{
-        xfs_log_item_chunk_t    *licp;
-        xfs_log_item_chunk_t    *next_licp;
-        int                     abort;
-        abort = flags & XFS_TRANS_ABORT;
-        licp = &tp->t_items;
-        /*
-         * Special case the embedded chunk so we don't free it below.
-         */
-        if (!xfs_lic_are_all_free(licp)) {
-                (void) xfs_trans_unlock_chunk(licp, 1, abort, commit_lsn);
-                xfs_lic_all_free(licp);
-                licp->lic_unused = 0;
-        }
-        licp = licp->lic_next;
-        /*
-         * Unlock each item in each chunk and free the chunks.
-         */
-        while (licp != NULL) {
-                ASSERT(!xfs_lic_are_all_free(licp));
-                (void) xfs_trans_unlock_chunk(licp, 1, abort, commit_lsn);
-                next_licp = licp->lic_next;
-                kmem_free(licp);
-                licp = next_licp;
-        }
-        /*
-         * Reset the transaction structure's free item count.
-         */
-        tp->t_items_free = XFS_LIC_NUM_SLOTS;
-        tp->t_items.lic_next = NULL;
-}
-/*
- * This is called to unlock the items associated with a transaction.
- * Items which were not logged should be freed.
- * Those which were logged must still be tracked so they can be unpinned
- * when the transaction commits.
- */
-void
-xfs_trans_unlock_items(xfs_trans_t *tp, xfs_lsn_t commit_lsn)
-{
-        xfs_log_item_chunk_t    *licp;
-        xfs_log_item_chunk_t    *next_licp;
-        xfs_log_item_chunk_t    **licpp;
-        int                     freed;
-        freed = 0;
-        licp = &tp->t_items;
-        /*
-         * Special case the embedded chunk so we don't free.
-         */
-        if (!xfs_lic_are_all_free(licp)) {
-                freed = xfs_trans_unlock_chunk(licp, 0, 0, commit_lsn);
-        }
-        licpp = &(tp->t_items.lic_next);
-        licp = licp->lic_next;
-        /*
-         * Unlock each item in each chunk, free non-dirty descriptors,
-         * and free empty chunks.
-         */
-        while (licp != NULL) {
-                ASSERT(!xfs_lic_are_all_free(licp));
-                freed += xfs_trans_unlock_chunk(licp, 0, 0, commit_lsn);
-                next_licp = licp->lic_next;
-                if (xfs_lic_are_all_free(licp)) {
-                        *licpp = next_licp;
-                        kmem_free(licp);
-                        freed -= XFS_LIC_NUM_SLOTS;
-                } else {
-                        licpp = &(licp->lic_next);
-                }
-                ASSERT(*licpp == next_licp);
-                licp = next_licp;
-        }
-        /*
-         * Fix the free descriptor count in the transaction.
-         */
-        tp->t_items_free += freed;
-}
-/*
- * Unlock each item pointed to by a descriptor in the given chunk.
- * Stamp the commit lsn into each item if necessary.
- * Free descriptors pointing to items which are not dirty if freeing_chunk
- * is zero. If freeing_chunk is non-zero, then we need to unlock all
- * items in the chunk.
- * 
- * Return the number of descriptors freed.
- */
-STATIC int
-xfs_trans_unlock_chunk(
-        xfs_log_item_chunk_t    *licp,
-        int                     freeing_chunk,
-        int                     abort,
-        xfs_lsn_t               commit_lsn)
-{
-        xfs_log_item_desc_t     *lidp;
-        xfs_log_item_t          *lip;
-        int                     i;
-        int                     freed;
-        freed = 0;
-        lidp = licp->lic_descs;
-        for (i = 0; i < licp->lic_unused; i++, lidp++) {
-                if (xfs_lic_isfree(licp, i)) {
-                        continue;
-                }
-                lip = lidp->lid_item;
-                lip->li_desc = NULL;
-                if (commit_lsn != NULLCOMMITLSN)
-                        IOP_COMMITTING(lip, commit_lsn);
-                if (abort)
-                        lip->li_flags |= XFS_LI_ABORTED;
-                IOP_UNLOCK(lip);
-                /*
-                 * Free the descriptor if the item is not dirty
-                 * within this transaction and the caller is not
-                 * going to just free the entire thing regardless.
-                 */
-                if (!(freeing_chunk) &&
-                    (!(lidp->lid_flags & XFS_LID_DIRTY) || abort)) {
-                        xfs_lic_relse(licp, i);
-                        freed++;
-                }
-        }
-        return freed;
-}
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index c6e4f2c8de6e..62da86c90de5 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -23,23 +23,10 @@ struct xfs_log_item_desc;
 struct xfs_mount;
 struct xfs_trans;
-/*
+void    xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *);
- * From xfs_trans_item.c
+void    xfs_trans_del_item(struct xfs_log_item *);
- */
-struct xfs_log_item_desc        *xfs_trans_add_item(struct xfs_trans *,
-                                            struct xfs_log_item *);
-void                            xfs_trans_free_item(struct xfs_trans *,
-                                            struct xfs_log_item_desc *);
-struct xfs_log_item_desc        *xfs_trans_find_item(struct xfs_trans *,
-                                             struct xfs_log_item *);
-struct xfs_log_item_desc        *xfs_trans_first_item(struct xfs_trans *);
-struct xfs_log_item_desc        *xfs_trans_next_item(struct xfs_trans *,
-                                             struct xfs_log_item_desc *);
-void    xfs_trans_unlock_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn);
 void    xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn,
                                int flags);
 void    xfs_trans_item_committed(struct xfs_log_item *lip,
                                xfs_lsn_t commit_lsn, int aborted);
 void    xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp);
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h
index 320775295e32..26d1867d8156 100644
--- a/fs/xfs/xfs_types.h
+++ b/fs/xfs/xfs_types.h
@@ -73,8 +73,6 @@ typedef	__int32_t	xfs_tid_t;	/* transaction identifier */
 typedef __uint32_t      xfs_dablk_t;    /* dir/attr block number (in file) */
 typedef __uint32_t      xfs_dahash_t;   /* dir/attr hash value */
-typedef __uint16_t      xfs_prid_t;     /* prid_t truncated to 16bits in XFS */
 typedef __uint32_t      xlog_tid_t;     /* transaction ID type */
 /*
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index 4d88616bde91..8b32d1a4c5a1 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -25,18 +25,14 @@
 #include "xfs_sb.h"
 #include "xfs_ag.h"
 #include "xfs_dir2.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_inode_item.h"
 #include "xfs_bmap.h"
 #include "xfs_error.h"
 #include "xfs_quota.h"
-#include "xfs_rw.h"
 #include "xfs_itable.h"
 #include "xfs_utils.h"
@@ -60,7 +56,6 @@ xfs_dir_ialloc(
        mode_t          mode,
        xfs_nlink_t     nlink,
        xfs_dev_t       rdev,
-        cred_t          *credp,
        prid_t          prid,           /* project id */
        int             okalloc,        /* ok to allocate new space */
        xfs_inode_t     **ipp,          /* pointer to inode; it will be
@@ -97,7 +92,7 @@ xfs_dir_ialloc(
         * transaction commit so that no other process can steal
         * the inode(s) that we've just allocated.
         */
-        code = xfs_ialloc(tp, dp, mode, nlink, rdev, credp, prid, okalloc,
+        code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, okalloc,
                          &ialloc_context, &call_again, &ip);
        /*
@@ -201,7 +196,7 @@ xfs_dir_ialloc(
                 * other allocations in this allocation group,
                 * this call should always succeed.
                 */
-                code = xfs_ialloc(tp, dp, mode, nlink, rdev, credp, prid,
+                code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid,
                                  okalloc, &ialloc_context, &call_again, &ip);
                /*
@@ -239,7 +234,7 @@ xfs_droplink(
 {
        int     error;
-        xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
+        xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
        ASSERT (ip->i_d.di_nlink > 0);
        ip->i_d.di_nlink--;
@@ -303,7 +298,7 @@ xfs_bumplink(
 {
        if (ip->i_d.di_nlink >= XFS_MAXLINK)
                return XFS_ERROR(EMLINK);
-        xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
+        xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
        ASSERT(ip->i_d.di_nlink > 0);
        ip->i_d.di_nlink++;
@@ -324,86 +319,3 @@ xfs_bumplink(
        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
        return 0;
 }
-/*
- * Try to truncate the given file to 0 length.  Currently called
- * only out of xfs_remove when it has to truncate a file to free
- * up space for the remove to proceed.
- */
-int
-xfs_truncate_file(
-        xfs_mount_t     *mp,
-        xfs_inode_t     *ip)
-{
-        xfs_trans_t     *tp;
-        int             error;
-#ifdef QUOTADEBUG
-        /*
-         * This is called to truncate the quotainodes too.
-         */
-        if (XFS_IS_UQUOTA_ON(mp)) {
-                if (ip->i_ino != mp->m_sb.sb_uquotino)
-                        ASSERT(ip->i_udquot);
-        }
-        if (XFS_IS_OQUOTA_ON(mp)) {
-                if (ip->i_ino != mp->m_sb.sb_gquotino)
-                        ASSERT(ip->i_gdquot);
-        }
-#endif
-        /*
-         * Make the call to xfs_itruncate_start before starting the
-         * transaction, because we cannot make the call while we're
-         * in a transaction.
-         */
-        xfs_ilock(ip, XFS_IOLOCK_EXCL);
-        error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, (xfs_fsize_t)0);
-        if (error) {
-                xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-                return error;
-        }
-        tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE);
-        if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
-                                      XFS_TRANS_PERM_LOG_RES,
-                                      XFS_ITRUNCATE_LOG_COUNT))) {
-                xfs_trans_cancel(tp, 0);
-                xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-                return error;
-        }
-        /*
-         * Follow the normal truncate locking protocol.  Since we
-         * hold the inode in the transaction, we know that its number
-         * of references will stay constant.
-         */
-        xfs_ilock(ip, XFS_ILOCK_EXCL);
-        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-        xfs_trans_ihold(tp, ip);
-        /*
-         * Signal a sync xaction.  The only case where that isn't
-         * the case is if we're truncating an already unlinked file
-         * on a wsync fs.  In that case, we know the blocks can't
-         * reappear in the file because the links to file are
-         * permanently toast.  Currently, we're always going to
-         * want a sync transaction because this code is being
-         * called from places where nlink is guaranteed to be 1
-         * but I'm leaving the tests in to protect against future
-         * changes -- rcc.
-         */
-        error = xfs_itruncate_finish(&tp, ip, (xfs_fsize_t)0,
-                                     XFS_DATA_FORK,
-                                     ((ip->i_d.di_nlink != 0 ||
-                                       !(mp->m_flags & XFS_MOUNT_WSYNC))
-                                      ? 1 : 0));
-        if (error) {
-                xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
-                                 XFS_TRANS_ABORT);
-        } else {
-                xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
-                error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-        }
-        xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-        return error;
-}
diff --git a/fs/xfs/xfs_utils.h b/fs/xfs/xfs_utils.h
index ef321225d269..456fca314933 100644
--- a/fs/xfs/xfs_utils.h
+++ b/fs/xfs/xfs_utils.h
@@ -18,10 +18,8 @@
 #ifndef __XFS_UTILS_H__
 #define __XFS_UTILS_H__
-extern int xfs_truncate_file(xfs_mount_t *, xfs_inode_t *);
 extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t,
-                                xfs_dev_t, cred_t *, prid_t, int,
+                                xfs_dev_t, prid_t, int, xfs_inode_t **, int *);
-                                xfs_inode_t **, int *);
 extern int xfs_droplink(xfs_trans_t *, xfs_inode_t *);
 extern int xfs_bumplink(xfs_trans_t *, xfs_inode_t *);
 extern void xfs_bump_ino_vers2(xfs_trans_t *, xfs_inode_t *);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index c1646838898f..8e4a63c4151a 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -26,19 +26,14 @@
 #include "xfs_sb.h"
 #include "xfs_ag.h"
 #include "xfs_dir2.h"
-#include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_inode_item.h"
 #include "xfs_itable.h"
-#include "xfs_btree.h"
 #include "xfs_ialloc.h"
 #include "xfs_alloc.h"
 #include "xfs_bmap.h"
@@ -73,7 +68,7 @@ xfs_setattr(
        struct xfs_dquot        *udqp, *gdqp, *olddquot1, *olddquot2;
        int                     need_iolock = 1;
-        xfs_itrace_entry(ip);
+        trace_xfs_setattr(ip);
        if (mp->m_flags & XFS_MOUNT_RDONLY)
                return XFS_ERROR(EROFS);
@@ -119,7 +114,7 @@ xfs_setattr(
                 */
                ASSERT(udqp == NULL);
                ASSERT(gdqp == NULL);
-                code = xfs_qm_vop_dqalloc(ip, uid, gid, ip->i_d.di_projid,
+                code = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip),
                                         qflags, &udqp, &gdqp);
                if (code)
                        return code;
@@ -143,16 +138,6 @@ xfs_setattr(
                        goto error_return;
                }
        } else {
-                if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) &&
-                    !(flags & XFS_ATTR_DMI)) {
-                        int dmflags = AT_DELAY_FLAG(flags) | DM_SEM_FLAG_WR;
-                        code = XFS_SEND_DATA(mp, DM_EVENT_TRUNCATE, ip,
-                                iattr->ia_size, 0, dmflags, NULL);
-                        if (code) {
-                                lock_flags = 0;
-                                goto error_return;
-                        }
-                }
                if (need_iolock)
                        lock_flags |= XFS_IOLOCK_EXCL;
        }
@@ -199,8 +184,11 @@ xfs_setattr(
                    ip->i_size == 0 && ip->i_d.di_nextents == 0) {
                        xfs_iunlock(ip, XFS_ILOCK_EXCL);
                        lock_flags &= ~XFS_ILOCK_EXCL;
-                        if (mask & ATTR_CTIME)
+                        if (mask & ATTR_CTIME) {
-                                xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+                                inode->i_mtime = inode->i_ctime =
+                                                current_fs_time(inode->i_sb);
+                                xfs_mark_inode_dirty_sync(ip);
+                        }
                        code = 0;
                        goto error_return;
                }
@@ -236,8 +224,11 @@ xfs_setattr(
                         * transaction to modify the i_size.
                         */
                        code = xfs_zero_eof(ip, iattr->ia_size, ip->i_size);
+                        if (code)
+                                goto error_return;
                }
                xfs_iunlock(ip, XFS_ILOCK_EXCL);
+                lock_flags &= ~XFS_ILOCK_EXCL;
                /*
                 * We are going to log the inode size change in this
@@ -251,40 +242,38 @@ xfs_setattr(
                 * really care about here and prevents waiting for other data
                 * not within the range we care about here.
                 */
-                if (!code &&
+                if (ip->i_size != ip->i_d.di_size &&
-                    ip->i_size != ip->i_d.di_size &&
                    iattr->ia_size > ip->i_d.di_size) {
                        code = xfs_flush_pages(ip,
                                        ip->i_d.di_size, iattr->ia_size,
                                        XBF_ASYNC, FI_NONE);
+                        if (code)
+                                goto error_return;
                }
                /* wait for all I/O to complete */
                xfs_ioend_wait(ip);
-                if (!code)
+                code = -block_truncate_page(inode->i_mapping, iattr->ia_size,
-                        code = xfs_itruncate_data(ip, iattr->ia_size);
+                                            xfs_get_blocks);
-                if (code) {
+                if (code)
-                        ASSERT(tp == NULL);
-                        lock_flags &= ~XFS_ILOCK_EXCL;
-                        ASSERT(lock_flags == XFS_IOLOCK_EXCL || !need_iolock);
                        goto error_return;
-                }
                tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
-                if ((code = xfs_trans_reserve(tp, 0,
+                code = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
-                                             XFS_ITRUNCATE_LOG_RES(mp), 0,
+                                         XFS_TRANS_PERM_LOG_RES,
-                                             XFS_TRANS_PERM_LOG_RES,
+                                         XFS_ITRUNCATE_LOG_COUNT);
-                                             XFS_ITRUNCATE_LOG_COUNT))) {
+                if (code)
-                        xfs_trans_cancel(tp, 0);
+                        goto error_return;
-                        if (need_iolock)
-                                xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+                truncate_setsize(inode, iattr->ia_size);
-                        return code;
-                }
                commit_flags = XFS_TRANS_RELEASE_LOG_RES;
+                lock_flags |= XFS_ILOCK_EXCL;
                xfs_ilock(ip, XFS_ILOCK_EXCL);
-                xfs_trans_ijoin(tp, ip, lock_flags);
+                xfs_trans_ijoin(tp, ip);
-                xfs_trans_ihold(tp, ip);
                /*
                 * Only change the c/mtime if we are changing the size
@@ -334,8 +323,7 @@ xfs_setattr(
                        xfs_iflags_set(ip, XFS_ITRUNCATED);
                }
        } else if (tp) {
-                xfs_trans_ijoin(tp, ip, lock_flags);
+                xfs_trans_ijoin(tp, ip);
-                xfs_trans_ihold(tp, ip);
        }
        /*
@@ -470,17 +458,10 @@ xfs_setattr(
                        return XFS_ERROR(code);
        }
-        if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE) &&
-            !(flags & XFS_ATTR_DMI)) {
-                (void) XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, ip, DM_RIGHT_NULL,
-                                        NULL, DM_RIGHT_NULL, NULL, NULL,
-                                        0, 0, AT_DELAY_FLAG(flags));
-        }
        return 0;
 abort_return:
        commit_flags |= XFS_TRANS_ABORT;
-        /* FALLTHROUGH */
 error_return:
        xfs_qm_dqrele(udqp);
        xfs_qm_dqrele(gdqp);
@@ -516,7 +497,7 @@ xfs_readlink_bmap(
        int             error = 0;
        error = xfs_bmapi(NULL, ip, 0, XFS_B_TO_FSB(mp, pathlen), 0, NULL, 0,
-                        mval, &nmaps, NULL, NULL);
+                        mval, &nmaps, NULL);
        if (error)
                goto out;
@@ -557,7 +538,7 @@ xfs_readlink(
        int             pathlen;
        int             error = 0;
-        xfs_itrace_entry(ip);
+        trace_xfs_readlink(ip);
        if (XFS_FORCED_SHUTDOWN(mp))
                return XFS_ERROR(EIO);
@@ -613,14 +594,14 @@ xfs_free_eofblocks(
         */
        end_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)ip->i_size));
        last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
-        map_len = last_fsb - end_fsb;
+        if (last_fsb <= end_fsb)
-        if (map_len <= 0)
                return 0;
+        map_len = last_fsb - end_fsb;
        nimaps = 1;
        xfs_ilock(ip, XFS_ILOCK_SHARED);
        error = xfs_bmapi(NULL, ip, end_fsb, map_len, 0,
-                          NULL, 0, &imap, &nimaps, NULL, NULL);
+                          NULL, 0, &imap, &nimaps, NULL);
        xfs_iunlock(ip, XFS_ILOCK_SHARED);
        if (!error && (nimaps != 0) &&
@@ -675,10 +656,7 @@ xfs_free_eofblocks(
                }
                xfs_ilock(ip, XFS_ILOCK_EXCL);
-                xfs_trans_ijoin(tp, ip,
+                xfs_trans_ijoin(tp, ip);
-                                XFS_IOLOCK_EXCL |
-                                XFS_ILOCK_EXCL);
-                xfs_trans_ihold(tp, ip);
                error = xfs_itruncate_finish(&tp, ip,
                                             ip->i_size,
@@ -750,8 +728,7 @@ xfs_inactive_symlink_rmt(
        xfs_ilock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
        size = (int)ip->i_d.di_size;
        ip->i_d.di_size = 0;
-        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+        xfs_trans_ijoin(tp, ip);
-        xfs_trans_ihold(tp, ip);
        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
        /*
         * Find the block(s) so we can inval and unmap them.
@@ -761,7 +738,7 @@ xfs_inactive_symlink_rmt(
        nmaps = ARRAY_SIZE(mval);
        if ((error = xfs_bmapi(tp, ip, 0, XFS_B_TO_FSB(mp, size),
                        XFS_BMAPI_METADATA, &first_block, 0, mval, &nmaps,
-                        &free_list, NULL)))
+                        &free_list)))
                goto error0;
        /*
         * Invalidate the block(s).
@@ -776,7 +753,7 @@ xfs_inactive_symlink_rmt(
         * Unmap the dead block(s) to the free_list.
         */
        if ((error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps,
-                        &first_block, &free_list, NULL, &done)))
+                        &first_block, &free_list, &done)))
                goto error1;
        ASSERT(done);
        /*
@@ -795,8 +772,7 @@ xfs_inactive_symlink_rmt(
         * Mark it dirty so it will be logged and moved forward in the log as
         * part of every commit.
         */
-        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+        xfs_trans_ijoin(tp, ip);
-        xfs_trans_ihold(tp, ip);
        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
        /*
         * Get a new, empty transaction to return to our caller.
@@ -929,8 +905,7 @@ xfs_inactive_attrs(
                goto error_cancel;
        xfs_ilock(ip, XFS_ILOCK_EXCL);
-        xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
+        xfs_trans_ijoin(tp, ip);
-        xfs_trans_ihold(tp, ip);
        xfs_idestroy_fork(ip, XFS_ATTR_FORK);
        ASSERT(ip->i_d.di_anextents == 0);
@@ -1035,8 +1010,6 @@ xfs_inactive(
        int             error;
        int             truncate;
-        xfs_itrace_entry(ip);
        /*
         * If the inode is already free, then there can be nothing
         * to clean up here.
@@ -1060,9 +1033,6 @@ xfs_inactive(
        mp = ip->i_mount;
-        if (ip->i_d.di_nlink == 0 && DM_EVENT_ENABLED(ip, DM_EVENT_DESTROY))
-                XFS_SEND_DESTROY(mp, ip, DM_RIGHT_NULL);
        error = 0;
        /* If this is a read-only mount, don't do this (would generate I/O) */
@@ -1120,8 +1090,7 @@ xfs_inactive(
                }
                xfs_ilock(ip, XFS_ILOCK_EXCL);
-                xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
+                xfs_trans_ijoin(tp, ip);
-                xfs_trans_ihold(tp, ip);
                /*
                 * normally, we have to run xfs_itruncate_finish sync.
@@ -1154,8 +1123,7 @@ xfs_inactive(
                        return VN_INACTIVE_CACHE;
                }
-                xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
+                xfs_trans_ijoin(tp, ip);
-                xfs_trans_ihold(tp, ip);
        } else {
                error = xfs_trans_reserve(tp, 0,
                                          XFS_IFREE_LOG_RES(mp),
@@ -1168,8 +1136,7 @@ xfs_inactive(
                }
                xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-                xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
+                xfs_trans_ijoin(tp, ip);
-                xfs_trans_ihold(tp, ip);
        }
        /*
@@ -1257,7 +1224,7 @@ xfs_lookup(
        int                     error;
        uint                    lock_mode;
-        xfs_itrace_entry(dp);
+        trace_xfs_lookup(dp, name);
        if (XFS_FORCED_SHUTDOWN(dp->i_mount))
                return XFS_ERROR(EIO);
@@ -1289,8 +1256,7 @@ xfs_create(
        struct xfs_name         *name,
        mode_t                  mode,
        xfs_dev_t               rdev,
-        xfs_inode_t             **ipp,
+        xfs_inode_t             **ipp)
-        cred_t                  *credp)
 {
        int                     is_dir = S_ISDIR(mode);
        struct xfs_mount        *mp = dp->i_mount;
@@ -1302,32 +1268,22 @@ xfs_create(
        boolean_t               unlock_dp_on_error = B_FALSE;
        uint                    cancel_flags;
        int                     committed;
-        xfs_prid_t              prid;
+        prid_t                  prid;
        struct xfs_dquot        *udqp = NULL;
        struct xfs_dquot        *gdqp = NULL;
        uint                    resblks;
        uint                    log_res;
        uint                    log_count;
-        xfs_itrace_entry(dp);
+        trace_xfs_create(dp, name);
        if (XFS_FORCED_SHUTDOWN(mp))
                return XFS_ERROR(EIO);
-        if (DM_EVENT_ENABLED(dp, DM_EVENT_CREATE)) {
-                error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE,
-                                dp, DM_RIGHT_NULL, NULL,
-                                DM_RIGHT_NULL, name->name, NULL,
-                                mode, 0, 0);
-                if (error)
-                        return error;
-        }
        if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
-                prid = dp->i_d.di_projid;
+                prid = xfs_get_projid(dp);
        else
-                prid = dfltprid;
+                prid = XFS_PROJID_DEFAULT;
        /*
         * Make sure that we have allocated dquot(s) on disk.
@@ -1406,7 +1362,7 @@ xfs_create(
         * entry pointing to them, but a directory also the "." entry
         * pointing to itself.
         */
-        error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, credp,
+        error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev,
                               prid, resblks > 0, &ip, &committed);
        if (error) {
                if (error == ENOSPC)
@@ -1427,8 +1383,7 @@ xfs_create(
         * the transaction cancel unlocking dp so don't do it explicitly in the
         * error path.
         */
-        IHOLD(dp);
+        xfs_trans_ijoin_ref(tp, dp, XFS_ILOCK_EXCL);
-        xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
        unlock_dp_on_error = B_FALSE;
        error = xfs_dir_createname(tp, dp, name, ip->i_ino,
@@ -1438,7 +1393,7 @@ xfs_create(
                ASSERT(error != ENOSPC);
                goto out_trans_abort;
        }
-        xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+        xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
        xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
        if (is_dir) {
@@ -1487,16 +1442,7 @@ xfs_create(
        xfs_qm_dqrele(gdqp);
        *ipp = ip;
+        return 0;
-        /* Fallthrough to std_return with error = 0  */
- std_return:
-        if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTCREATE)) {
-                XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE, dp, DM_RIGHT_NULL,
-                                ip, DM_RIGHT_NULL, name->name, NULL, mode,
-                                error, 0);
-        }
-        return error;
 out_bmap_cancel:
        xfs_bmap_cancel(&free_list);
@@ -1510,8 +1456,8 @@ xfs_create(
        if (unlock_dp_on_error)
                xfs_iunlock(dp, XFS_ILOCK_EXCL);
+ std_return:
-        goto std_return;
+        return error;
 out_abort_rele:
        /*
@@ -1726,20 +1672,11 @@ xfs_remove(
        uint                    resblks;
        uint                    log_count;
-        xfs_itrace_entry(dp);
+        trace_xfs_remove(dp, name);
-        xfs_itrace_entry(ip);
        if (XFS_FORCED_SHUTDOWN(mp))
                return XFS_ERROR(EIO);
-        if (DM_EVENT_ENABLED(dp, DM_EVENT_REMOVE)) {
-                error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, dp, DM_RIGHT_NULL,
-                                        NULL, DM_RIGHT_NULL, name->name, NULL,
-                                        ip->i_d.di_mode, 0, 0);
-                if (error)
-                        return error;
-        }
        error = xfs_qm_dqattach(dp, 0);
        if (error)
                goto std_return;
@@ -1782,15 +1719,8 @@ xfs_remove(
        xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL);
-        /*
+        xfs_trans_ijoin_ref(tp, dp, XFS_ILOCK_EXCL);
-         * At this point, we've gotten both the directory and the entry
+        xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL);
-         * inodes locked.
-         */
-        IHOLD(ip);
-        xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
-        IHOLD(dp);
-        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
        /*
         * If we're removing a directory perform some additional validation.
@@ -1814,7 +1744,7 @@ xfs_remove(
                ASSERT(error != ENOENT);
                goto out_bmap_cancel;
        }
-        xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+        xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
        if (is_dir) {
                /*
@@ -1877,21 +1807,15 @@ xfs_remove(
        if (!is_dir && link_zero && xfs_inode_is_filestream(ip))
                xfs_filestream_deassociate(ip);
- std_return:
+        return 0;
-        if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) {
-                XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, dp, DM_RIGHT_NULL,
-                                NULL, DM_RIGHT_NULL, name->name, NULL,
-                                ip->i_d.di_mode, error, 0);
-        }
-        return error;
 out_bmap_cancel:
        xfs_bmap_cancel(&free_list);
        cancel_flags |= XFS_TRANS_ABORT;
 out_trans_cancel:
        xfs_trans_cancel(tp, cancel_flags);
-        goto std_return;
+ std_return:
+        return error;
 }
 int
@@ -1909,25 +1833,13 @@ xfs_link(
        int                     committed;
        int                     resblks;
-        xfs_itrace_entry(tdp);
+        trace_xfs_link(tdp, target_name);
-        xfs_itrace_entry(sip);
        ASSERT(!S_ISDIR(sip->i_d.di_mode));
        if (XFS_FORCED_SHUTDOWN(mp))
                return XFS_ERROR(EIO);
-        if (DM_EVENT_ENABLED(tdp, DM_EVENT_LINK)) {
-                error = XFS_SEND_NAMESP(mp, DM_EVENT_LINK,
-                                        tdp, DM_RIGHT_NULL,
-                                        sip, DM_RIGHT_NULL,
-                                        target_name->name, NULL, 0, 0, 0);
-                if (error)
-                        return error;
-        }
-        /* Return through std_return after this point. */
        error = xfs_qm_dqattach(sip, 0);
        if (error)
                goto std_return;
@@ -1953,15 +1865,8 @@ xfs_link(
        xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL);
-        /*
+        xfs_trans_ijoin_ref(tp, sip, XFS_ILOCK_EXCL);
-         * Increment vnode ref counts since xfs_trans_commit &
+        xfs_trans_ijoin_ref(tp, tdp, XFS_ILOCK_EXCL);
-         * xfs_trans_cancel will both unlock the inodes and
-         * decrement the associated ref counts.
-         */
-        IHOLD(sip);
-        IHOLD(tdp);
-        xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL);
-        xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL);
        /*
         * If the source has too many links, we can't make any more to it.
@@ -1977,7 +1882,7 @@ xfs_link(
         * the tree quota mechanism could be circumvented.
         */
        if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
-                     (tdp->i_d.di_projid != sip->i_d.di_projid))) {
+                     (xfs_get_projid(tdp) != xfs_get_projid(sip)))) {
                error = XFS_ERROR(EXDEV);
                goto error_return;
        }
@@ -1992,7 +1897,7 @@ xfs_link(
                                        &first_block, &free_list, resblks);
        if (error)
                goto abort_return;
-        xfs_ichgtime(tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+        xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
        xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE);
        error = xfs_bumplink(tp, sip);
@@ -2014,27 +1919,14 @@ xfs_link(
                goto abort_return;
        }
-        error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+        return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-        if (error)
-                goto std_return;
-        /* Fall through to std_return with error = 0. */
-std_return:
-        if (DM_EVENT_ENABLED(sip, DM_EVENT_POSTLINK)) {
-                (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTLINK,
-                                tdp, DM_RIGHT_NULL,
-                                sip, DM_RIGHT_NULL,
-                                target_name->name, NULL, 0, error, 0);
-        }
-        return error;
 abort_return:
        cancel_flags |= XFS_TRANS_ABORT;
-        /* FALLTHROUGH */
 error_return:
        xfs_trans_cancel(tp, cancel_flags);
-        goto std_return;
+ std_return:
+        return error;
 }
 int
@@ -2043,8 +1935,7 @@ xfs_symlink(
        struct xfs_name         *link_name,
        const char              *target_path,
        mode_t                  mode,
-        xfs_inode_t             **ipp,
+        xfs_inode_t             **ipp)
-        cred_t                  *credp)
 {
        xfs_mount_t             *mp = dp->i_mount;
        xfs_trans_t             *tp;
@@ -2065,7 +1956,7 @@ xfs_symlink(
        int                     byte_cnt;
        int                     n;
        xfs_buf_t               *bp;
-        xfs_prid_t              prid;
+        prid_t                  prid;
        struct xfs_dquot        *udqp, *gdqp;
        uint                    resblks;
@@ -2074,7 +1965,7 @@ xfs_symlink(
        ip = NULL;
        tp = NULL;
-        xfs_itrace_entry(dp);
+        trace_xfs_symlink(dp, link_name);
        if (XFS_FORCED_SHUTDOWN(mp))
                return XFS_ERROR(EIO);
@@ -2086,22 +1977,11 @@ xfs_symlink(
        if (pathlen >= MAXPATHLEN)      /* total string too long */
                return XFS_ERROR(ENAMETOOLONG);
-        if (DM_EVENT_ENABLED(dp, DM_EVENT_SYMLINK)) {
-                error = XFS_SEND_NAMESP(mp, DM_EVENT_SYMLINK, dp,
-                                        DM_RIGHT_NULL, NULL, DM_RIGHT_NULL,
-                                        link_name->name,
-                                        (unsigned char *)target_path, 0, 0, 0);
-                if (error)
-                        return error;
-        }
-        /* Return through std_return after this point. */
        udqp = gdqp = NULL;
        if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
-                prid = dp->i_d.di_projid;
+                prid = xfs_get_projid(dp);
        else
-                prid = (xfs_prid_t)dfltprid;
+                prid = XFS_PROJID_DEFAULT;
        /*
         * Make sure that we have allocated dquot(s) on disk.
@@ -2167,8 +2047,8 @@ xfs_symlink(
        /*
         * Allocate an inode for the symlink.
         */
-        error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT),
+        error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0,
-                               1, 0, credp, prid, resblks > 0, &ip, NULL);
+                               prid, resblks > 0, &ip, NULL);
        if (error) {
                if (error == ENOSPC)
                        goto error_return;
@@ -2180,8 +2060,7 @@ xfs_symlink(
         * transaction cancel unlocking dp so don't do it explicitly in the
         * error path.
         */
-        IHOLD(dp);
+        xfs_trans_ijoin_ref(tp, dp, XFS_ILOCK_EXCL);
-        xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
        unlock_dp_on_error = B_FALSE;
        /*
@@ -2215,7 +2094,7 @@ xfs_symlink(
                error = xfs_bmapi(tp, ip, first_fsb, fs_blocks,
                                  XFS_BMAPI_WRITE | XFS_BMAPI_METADATA,
                                  &first_block, resblks, mval, &nmaps,
-                                  &free_list, NULL);
+                                  &free_list);
                if (error) {
                        goto error1;
                }
@@ -2251,7 +2130,7 @@ xfs_symlink(
                                        &first_block, &free_list, resblks);
        if (error)
                goto error1;
-        xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+        xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
        xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
        /*
@@ -2278,21 +2157,8 @@ xfs_symlink(
        xfs_qm_dqrele(udqp);
        xfs_qm_dqrele(gdqp);
-        /* Fall through to std_return with error = 0 or errno from
+        *ipp = ip;
-         * xfs_trans_commit     */
+        return 0;
-std_return:
-        if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTSYMLINK)) {
-                (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTSYMLINK,
-                                        dp, DM_RIGHT_NULL,
-                                        error ? NULL : ip,
-                                        DM_RIGHT_NULL, link_name->name,
-                                        (unsigned char *)target_path,
-                                        0, error, 0);
-        }
-        if (!error)
-                *ipp = ip;
-        return error;
 error2:
        IRELE(ip);
@@ -2306,8 +2172,8 @@ std_return:
        if (unlock_dp_on_error)
                xfs_iunlock(dp, XFS_ILOCK_EXCL);
+ std_return:
-        goto std_return;
+        return error;
 }
 int
@@ -2333,13 +2199,12 @@ xfs_set_dmattrs(
                return error;
        }
        xfs_ilock(ip, XFS_ILOCK_EXCL);
-        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+        xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL);
        ip->i_d.di_dmevmask = evmask;
        ip->i_d.di_dmstate  = state;
        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-        IHOLD(ip);
        error = xfs_trans_commit(tp, 0);
        return error;
@@ -2390,7 +2255,7 @@ xfs_alloc_file_space(
        int                     committed;
        int                     error;
-        xfs_itrace_entry(ip);
+        trace_xfs_alloc_file_space(ip);
        if (XFS_FORCED_SHUTDOWN(mp))
                return XFS_ERROR(EIO);
@@ -2408,29 +2273,13 @@ xfs_alloc_file_space(
        count = len;
        imapp = &imaps[0];
        nimaps = 1;
-        bmapi_flag = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0);
+        bmapi_flag = XFS_BMAPI_WRITE | alloc_type;
        startoffset_fsb = XFS_B_TO_FSBT(mp, offset);
        allocatesize_fsb = XFS_B_TO_FSB(mp, count);
-        /*      Generate a DMAPI event if needed.       */
-        if (alloc_type != 0 && offset < ip->i_size &&
-                        (attr_flags & XFS_ATTR_DMI) == 0  &&
-                        DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) {
-                xfs_off_t           end_dmi_offset;
-                end_dmi_offset = offset+len;
-                if (end_dmi_offset > ip->i_size)
-                        end_dmi_offset = ip->i_size;
-                error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, offset,
-                                      end_dmi_offset - offset, 0, NULL);
-                if (error)
-                        return error;
-        }
        /*
         * Allocate file space until done or until there is an error
         */
-retry:
        while (allocatesize_fsb && !error) {
                xfs_fileoff_t   s, e;
@@ -2451,15 +2300,22 @@ retry:
                        e = allocatesize_fsb;
                }
+                /*
+                 * The transaction reservation is limited to a 32-bit block
+                 * count, hence we need to limit the number of blocks we are
+                 * trying to reserve to avoid an overflow. We can't allocate
+                 * more than @nimaps extents, and an extent is limited on disk
+                 * to MAXEXTLEN (21 bits), so use that to enforce the limit.
+                 */
+                resblks = min_t(xfs_fileoff_t, (e - s), (MAXEXTLEN * nimaps));
                if (unlikely(rt)) {
-                        resrtextents = qblocks = (uint)(e - s);
+                        resrtextents = qblocks = resblks;
                        resrtextents /= mp->m_sb.sb_rextsize;
                        resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
                        quota_flag = XFS_QMOPT_RES_RTBLKS;
                } else {
                        resrtextents = 0;
-                        resblks = qblocks = \
+                        resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resblks);
-                                XFS_DIOSTRAT_SPACE_RES(mp, (uint)(e - s));
                        quota_flag = XFS_QMOPT_RES_REGBLKS;
                }
@@ -2488,8 +2344,7 @@ retry:
                if (error)
                        goto error1;
-                xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+                xfs_trans_ijoin(tp, ip);
-                xfs_trans_ihold(tp, ip);
                /*
                 * Issue the xfs_bmapi() call to allocate the blocks
@@ -2498,7 +2353,7 @@ retry:
                error = xfs_bmapi(tp, ip, startoffset_fsb,
                                  allocatesize_fsb, bmapi_flag,
                                  &firstfsb, 0, imapp, &nimaps,
-                                  &free_list, NULL);
+                                  &free_list);
                if (error) {
                        goto error0;
                }
@@ -2527,17 +2382,6 @@ retry:
                startoffset_fsb += allocated_fsb;
                allocatesize_fsb -= allocated_fsb;
        }
-dmapi_enospc_check:
-        if (error == ENOSPC && (attr_flags & XFS_ATTR_DMI) == 0 &&
-            DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE)) {
-                error = XFS_SEND_NAMESP(mp, DM_EVENT_NOSPACE,
-                                ip, DM_RIGHT_NULL,
-                                ip, DM_RIGHT_NULL,
-                                NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */
-                if (error == 0)
-                        goto retry;     /* Maybe DMAPI app. has made space */
-                /* else fall through with error from XFS_SEND_DATA */
-        }
        return error;
@@ -2548,7 +2392,7 @@ error0:	/* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
 error1: /* Just cancel transaction */
        xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
        xfs_iunlock(ip, XFS_ILOCK_EXCL);
-        goto dmapi_enospc_check;
+        return error;
 }
 /*
@@ -2588,9 +2432,9 @@ xfs_zero_remaining_bytes(
        if (endoff > ip->i_size)
                endoff = ip->i_size;
-        bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize,
+        bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ?
-                                XFS_IS_REALTIME_INODE(ip) ?
+                                        mp->m_rtdev_targp : mp->m_ddev_targp,
-                                mp->m_rtdev_targp : mp->m_ddev_targp);
+                                mp->m_sb.sb_blocksize, XBF_DONT_BLOCK);
        if (!bp)
                return XFS_ERROR(ENOMEM);
@@ -2598,7 +2442,7 @@ xfs_zero_remaining_bytes(
                offset_fsb = XFS_B_TO_FSBT(mp, offset);
                nimap = 1;
                error = xfs_bmapi(NULL, ip, offset_fsb, 1, 0,
-                        NULL, 0, &imap, &nimap, NULL, NULL);
+                        NULL, 0, &imap, &nimap, NULL);
                if (error || nimap < 1)
                        break;
                ASSERT(imap.br_blockcount >= 1);
@@ -2616,7 +2460,7 @@ xfs_zero_remaining_bytes(
                XFS_BUF_READ(bp);
                XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock));
                xfsbdstrat(mp, bp);
-                error = xfs_iowait(bp);
+                error = xfs_buf_iowait(bp);
                if (error) {
                        xfs_ioerror_alert("xfs_zero_remaining_bytes(read)",
                                          mp, bp, XFS_BUF_ADDR(bp));
@@ -2629,7 +2473,7 @@ xfs_zero_remaining_bytes(
                XFS_BUF_UNREAD(bp);
                XFS_BUF_WRITE(bp);
                xfsbdstrat(mp, bp);
-                error = xfs_iowait(bp);
+                error = xfs_buf_iowait(bp);
                if (error) {
                        xfs_ioerror_alert("xfs_zero_remaining_bytes(write)",
                                          mp, bp, XFS_BUF_ADDR(bp));
@@ -2661,7 +2505,6 @@ xfs_free_file_space(
 {
        int                     committed;
        int                     done;
-        xfs_off_t               end_dmi_offset;
        xfs_fileoff_t           endoffset_fsb;
        int                     error;
        xfs_fsblock_t           firstfsb;
@@ -2680,7 +2523,7 @@ xfs_free_file_space(
        mp = ip->i_mount;
-        xfs_itrace_entry(ip);
+        trace_xfs_free_file_space(ip);
        error = xfs_qm_dqattach(ip, 0);
        if (error)
@@ -2691,19 +2534,7 @@ xfs_free_file_space(
                return error;
        rt = XFS_IS_REALTIME_INODE(ip);
        startoffset_fsb = XFS_B_TO_FSB(mp, offset);
-        end_dmi_offset = offset + len;
+        endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len);
-        endoffset_fsb = XFS_B_TO_FSBT(mp, end_dmi_offset);
-        if (offset < ip->i_size && (attr_flags & XFS_ATTR_DMI) == 0 &&
-            DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) {
-                if (end_dmi_offset > ip->i_size)
-                        end_dmi_offset = ip->i_size;
-                error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip,
-                                offset, end_dmi_offset - offset,
-                                AT_DELAY_FLAG(attr_flags), NULL);
-                if (error)
-                        return error;
-        }
        if (attr_flags & XFS_ATTR_NOLOCK)
                need_iolock = 0;
@@ -2731,7 +2562,7 @@ xfs_free_file_space(
        if (rt && !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
                nimap = 1;
                error = xfs_bmapi(NULL, ip, startoffset_fsb,
-                        1, 0, NULL, 0, &imap, &nimap, NULL, NULL);
+                        1, 0, NULL, 0, &imap, &nimap, NULL);
                if (error)
                        goto out_unlock_iolock;
                ASSERT(nimap == 0 || nimap == 1);
@@ -2746,7 +2577,7 @@ xfs_free_file_space(
                }
                nimap = 1;
                error = xfs_bmapi(NULL, ip, endoffset_fsb - 1,
-                        1, 0, NULL, 0, &imap, &nimap, NULL, NULL);
+                        1, 0, NULL, 0, &imap, &nimap, NULL);
                if (error)
                        goto out_unlock_iolock;
                ASSERT(nimap == 0 || nimap == 1);
@@ -2814,8 +2645,7 @@ xfs_free_file_space(
                if (error)
                        goto error1;
-                xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+                xfs_trans_ijoin(tp, ip);
-                xfs_trans_ihold(tp, ip);
                /*
                 * issue the bunmapi() call to free the blocks
@@ -2823,7 +2653,7 @@ xfs_free_file_space(
                xfs_bmap_init(&free_list, &firstfsb);
                error = xfs_bunmapi(tp, ip, startoffset_fsb,
                                  endoffset_fsb - startoffset_fsb,
-                                  0, 2, &firstfsb, &free_list, NULL, &done);
+                                  0, 2, &firstfsb, &free_list, &done);
                if (error) {
                        goto error0;
                }
@@ -2882,8 +2712,7 @@ xfs_change_file_space(
        xfs_off_t       llen;
        xfs_trans_t     *tp;
        struct iattr    iattr;
+        int             prealloc_type;
-        xfs_itrace_entry(ip);
        if (!S_ISREG(ip->i_d.di_mode))
                return XFS_ERROR(EINVAL);
@@ -2926,12 +2755,17 @@ xfs_change_file_space(
         * size to be changed.
         */
        setprealloc = clrprealloc = 0;
+        prealloc_type = XFS_BMAPI_PREALLOC;
        switch (cmd) {
+        case XFS_IOC_ZERO_RANGE:
+                prealloc_type |= XFS_BMAPI_CONVERT;
+                xfs_tosspages(ip, startoffset, startoffset + bf->l_len, 0);
+                /* FALLTHRU */
        case XFS_IOC_RESVSP:
        case XFS_IOC_RESVSP64:
                error = xfs_alloc_file_space(ip, startoffset, bf->l_len,
-                                                                1, attr_flags);
+                                                prealloc_type, attr_flags);
                if (error)
                        return error;
                setprealloc = 1;
@@ -2985,8 +2819,7 @@ xfs_change_file_space(
        xfs_ilock(ip, XFS_ILOCK_EXCL);
-        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+        xfs_trans_ijoin(tp, ip);
-        xfs_trans_ihold(tp, ip);
        if ((attr_flags & XFS_ATTR_DMI) == 0) {
                ip->i_d.di_mode &= ~S_ISUID;
@@ -3001,7 +2834,7 @@ xfs_change_file_space(
                if (ip->i_d.di_mode & S_IXGRP)
                        ip->i_d.di_mode &= ~S_ISGID;
-                xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+                xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
        }
        if (setprealloc)
                ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC;
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index d8dfa8d0dadd..f6702927eee4 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -2,7 +2,6 @@
 #define _XFS_VNODEOPS_H 1
 struct attrlist_cursor_kern;
-struct cred;
 struct file;
 struct iattr;
 struct inode;
@@ -26,7 +25,7 @@ int xfs_inactive(struct xfs_inode *ip);
 int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
                struct xfs_inode **ipp, struct xfs_name *ci_name);
 int xfs_create(struct xfs_inode *dp, struct xfs_name *name, mode_t mode,
-                xfs_dev_t rdev, struct xfs_inode **ipp, cred_t *credp);
+                xfs_dev_t rdev, struct xfs_inode **ipp);
 int xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
                struct xfs_inode *ip);
 int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
@@ -34,8 +33,7 @@ int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
 int xfs_readdir(struct xfs_inode        *dp, void *dirent, size_t bufsize,
                       xfs_off_t *offset, filldir_t filldir);
 int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name,
-                const char *target_path, mode_t mode, struct xfs_inode **ipp,
+                const char *target_path, mode_t mode, struct xfs_inode **ipp);
-                cred_t *credp);
 int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state);
 int xfs_change_file_space(struct xfs_inode *ip, int cmd,
                xfs_flock64_t *bf, xfs_off_t offset, int attr_flags);