Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs

* 'for-linus' of git://oss.sgi.com/xfs/xfs: (36 commits) xfs: semaphore cleanup xfs: Extend project quotas to support 32bit project ids xfs: remove xfs_buf wrappers xfs: remove xfs_cred.h xfs: remove xfs_globals.h xfs: remove xfs_version.h xfs: remove xfs_refcache.h xfs: fix the xfs_trans_committed xfs: remove unused t_callback field in struct xfs_trans xfs: fix bogus m_maxagi check in xfs_iget xfs: do not use xfs_mod_incore_sb_batch for per-cpu counters xfs: do not use xfs_mod_incore_sb for per-cpu counters xfs: remove XFS_MOUNT_NO_PERCPU_SB xfs: pack xfs_buf structure more tightly xfs: convert buffer cache hash to rbtree xfs: serialise inode reclaim within an AG xfs: batch inode reclaim lookup xfs: implement batched inode lookups for AG walking xfs: split out inode walk inode grabbing xfs: split inode AG walking into separate code for reclaim ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2010-10-22 20:32:27 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2010-10-22 20:32:27 -0400
commit: 5fe3a5ae5c09d53b2b3c7a971e1d87ab3a747055 (patch)
tree: 1e0d3e10c83e456a1678c4e01acb5ff624129202
parent: 0fc0531e0a2174377a86fd6953ecaa00287d8f70 (diff)
parent: 39dc948c6921169e13224a97fa53188922acfde8 (diff)
60 files changed, 1185 insertions, 1375 deletions
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index f3ccaec5760a..ba5312802aa9 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -188,8 +188,8 @@ _xfs_buf_initialize(
        atomic_set(&bp->b_hold, 1);
        init_completion(&bp->b_iowait);
        INIT_LIST_HEAD(&bp->b_list);
-        INIT_LIST_HEAD(&bp->b_hash_list);
+        RB_CLEAR_NODE(&bp->b_rbnode);
-        init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */
+        sema_init(&bp->b_sema, 0); /* held, no waiters */
        XB_SET_OWNER(bp);
        bp->b_target = target;
        bp->b_file_offset = range_base;
@@ -262,8 +262,6 @@ xfs_buf_free(
 {
        trace_xfs_buf_free(bp, _RET_IP_);
-        ASSERT(list_empty(&bp->b_hash_list));
        if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) {
                uint            i;
@@ -422,8 +420,10 @@ _xfs_buf_find(
 {
        xfs_off_t               range_base;
        size_t                  range_length;
-        xfs_bufhash_t           *hash;
+        struct xfs_perag        *pag;
-        xfs_buf_t               *bp, *n;
+        struct rb_node          **rbp;
+        struct rb_node          *parent;
+        xfs_buf_t               *bp;
        range_base = (ioff << BBSHIFT);
        range_length = (isize << BBSHIFT);
@@ -432,14 +432,37 @@ _xfs_buf_find(
        ASSERT(!(range_length < (1 << btp->bt_sshift)));
        ASSERT(!(range_base & (xfs_off_t)btp->bt_smask));
-        hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)];
+        /* get tree root */
+        pag = xfs_perag_get(btp->bt_mount,
-        spin_lock(&hash->bh_lock);
+                                xfs_daddr_to_agno(btp->bt_mount, ioff));
-        list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) {
+        /* walk tree */
-                ASSERT(btp == bp->b_target);
+        spin_lock(&pag->pag_buf_lock);
-                if (bp->b_file_offset == range_base &&
+        rbp = &pag->pag_buf_tree.rb_node;
-                    bp->b_buffer_length == range_length) {
+        parent = NULL;
+        bp = NULL;
+        while (*rbp) {
+                parent = *rbp;
+                bp = rb_entry(parent, struct xfs_buf, b_rbnode);
+                if (range_base < bp->b_file_offset)
+                        rbp = &(*rbp)->rb_left;
+                else if (range_base > bp->b_file_offset)
+                        rbp = &(*rbp)->rb_right;
+                else {
+                        /*
+                         * found a block offset match. If the range doesn't
+                         * match, the only way this is allowed is if the buffer
+                         * in the cache is stale and the transaction that made
+                         * it stale has not yet committed. i.e. we are
+                         * reallocating a busy extent. Skip this buffer and
+                         * continue searching to the right for an exact match.
+                         */
+                        if (bp->b_buffer_length != range_length) {
+                                ASSERT(bp->b_flags & XBF_STALE);
+                                rbp = &(*rbp)->rb_right;
+                                continue;
+                        }
                        atomic_inc(&bp->b_hold);
                        goto found;
                }
@@ -449,17 +472,21 @@ _xfs_buf_find(
        if (new_bp) {
                _xfs_buf_initialize(new_bp, btp, range_base,
                                range_length, flags);
-                new_bp->b_hash = hash;
+                rb_link_node(&new_bp->b_rbnode, parent, rbp);
-                list_add(&new_bp->b_hash_list, &hash->bh_list);
+                rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree);
+                /* the buffer keeps the perag reference until it is freed */
+                new_bp->b_pag = pag;
+                spin_unlock(&pag->pag_buf_lock);
        } else {
                XFS_STATS_INC(xb_miss_locked);
+                spin_unlock(&pag->pag_buf_lock);
+                xfs_perag_put(pag);
        }
-        spin_unlock(&hash->bh_lock);
        return new_bp;
 found:
-        spin_unlock(&hash->bh_lock);
+        spin_unlock(&pag->pag_buf_lock);
+        xfs_perag_put(pag);
        /* Attempt to get the semaphore without sleeping,
         * if this does not work then we need to drop the
@@ -625,8 +652,7 @@ void
 xfs_buf_readahead(
        xfs_buftarg_t           *target,
        xfs_off_t               ioff,
-        size_t                  isize,
+        size_t                  isize)
-        xfs_buf_flags_t         flags)
 {
        struct backing_dev_info *bdi;
@@ -634,8 +660,42 @@ xfs_buf_readahead(
        if (bdi_read_congested(bdi))
                return;
-        flags |= (XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD);
+        xfs_buf_read(target, ioff, isize,
-        xfs_buf_read(target, ioff, isize, flags);
+                     XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD|XBF_DONT_BLOCK);
+}
+/*
+ * Read an uncached buffer from disk. Allocates and returns a locked
+ * buffer containing the disk contents or nothing.
+ */
+struct xfs_buf *
+xfs_buf_read_uncached(
+        struct xfs_mount        *mp,
+        struct xfs_buftarg      *target,
+        xfs_daddr_t             daddr,
+        size_t                  length,
+        int                     flags)
+{
+        xfs_buf_t               *bp;
+        int                     error;
+        bp = xfs_buf_get_uncached(target, length, flags);
+        if (!bp)
+                return NULL;
+        /* set up the buffer for a read IO */
+        xfs_buf_lock(bp);
+        XFS_BUF_SET_ADDR(bp, daddr);
+        XFS_BUF_READ(bp);
+        XFS_BUF_BUSY(bp);
+        xfsbdstrat(mp, bp);
+        error = xfs_buf_iowait(bp);
+        if (error || bp->b_error) {
+                xfs_buf_relse(bp);
+                return NULL;
+        }
+        return bp;
 }
 xfs_buf_t *
@@ -707,9 +767,10 @@ xfs_buf_associate_memory(
 }
 xfs_buf_t *
-xfs_buf_get_noaddr(
+xfs_buf_get_uncached(
+        struct xfs_buftarg      *target,
        size_t                  len,
-        xfs_buftarg_t           *target)
+        int                     flags)
 {
        unsigned long           page_count = PAGE_ALIGN(len) >> PAGE_SHIFT;
        int                     error, i;
@@ -725,7 +786,7 @@ xfs_buf_get_noaddr(
                goto fail_free_buf;
        for (i = 0; i < page_count; i++) {
-                bp->b_pages[i] = alloc_page(GFP_KERNEL);
+                bp->b_pages[i] = alloc_page(xb_to_gfp(flags));
                if (!bp->b_pages[i])
                        goto fail_free_mem;
        }
@@ -740,7 +801,7 @@ xfs_buf_get_noaddr(
        xfs_buf_unlock(bp);
-        trace_xfs_buf_get_noaddr(bp, _RET_IP_);
+        trace_xfs_buf_get_uncached(bp, _RET_IP_);
        return bp;
 fail_free_mem:
@@ -774,29 +835,30 @@ void
 xfs_buf_rele(
        xfs_buf_t               *bp)
 {
-        xfs_bufhash_t           *hash = bp->b_hash;
+        struct xfs_perag        *pag = bp->b_pag;
        trace_xfs_buf_rele(bp, _RET_IP_);
-        if (unlikely(!hash)) {
+        if (!pag) {
                ASSERT(!bp->b_relse);
+                ASSERT(RB_EMPTY_NODE(&bp->b_rbnode));
                if (atomic_dec_and_test(&bp->b_hold))
                        xfs_buf_free(bp);
                return;
        }
+        ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode));
        ASSERT(atomic_read(&bp->b_hold) > 0);
-        if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) {
+        if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) {
                if (bp->b_relse) {
                        atomic_inc(&bp->b_hold);
-                        spin_unlock(&hash->bh_lock);
+                        spin_unlock(&pag->pag_buf_lock);
-                        (*(bp->b_relse)) (bp);
+                        bp->b_relse(bp);
-                } else if (bp->b_flags & XBF_FS_MANAGED) {
-                        spin_unlock(&hash->bh_lock);
                } else {
                        ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)));
-                        list_del_init(&bp->b_hash_list);
+                        rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
-                        spin_unlock(&hash->bh_lock);
+                        spin_unlock(&pag->pag_buf_lock);
+                        xfs_perag_put(pag);
                        xfs_buf_free(bp);
                }
        }
@@ -859,7 +921,7 @@ xfs_buf_lock(
        trace_xfs_buf_lock(bp, _RET_IP_);
        if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
-                xfs_log_force(bp->b_mount, 0);
+                xfs_log_force(bp->b_target->bt_mount, 0);
        if (atomic_read(&bp->b_io_remaining))
                blk_run_address_space(bp->b_target->bt_mapping);
        down(&bp->b_sema);
@@ -970,7 +1032,6 @@ xfs_bwrite(
 {
        int                     error;
-        bp->b_mount = mp;
        bp->b_flags |= XBF_WRITE;
        bp->b_flags &= ~(XBF_ASYNC | XBF_READ);
@@ -991,8 +1052,6 @@ xfs_bdwrite(
 {
        trace_xfs_buf_bdwrite(bp, _RET_IP_);
-        bp->b_mount = mp;
        bp->b_flags &= ~XBF_READ;
        bp->b_flags |= (XBF_DELWRI | XBF_ASYNC);
@@ -1001,7 +1060,7 @@ xfs_bdwrite(
 /*
 * Called when we want to stop a buffer from getting written or read.
- * We attach the EIO error, muck with its flags, and call biodone
+ * We attach the EIO error, muck with its flags, and call xfs_buf_ioend
 * so that the proper iodone callbacks get called.
 */
 STATIC int
@@ -1018,21 +1077,21 @@ xfs_bioerror(
        XFS_BUF_ERROR(bp, EIO);
        /*
-         * We're calling biodone, so delete XBF_DONE flag.
+         * We're calling xfs_buf_ioend, so delete XBF_DONE flag.
         */
        XFS_BUF_UNREAD(bp);
        XFS_BUF_UNDELAYWRITE(bp);
        XFS_BUF_UNDONE(bp);
        XFS_BUF_STALE(bp);
-        xfs_biodone(bp);
+        xfs_buf_ioend(bp, 0);
        return EIO;
 }
 /*
 * Same as xfs_bioerror, except that we are releasing the buffer
- * here ourselves, and avoiding the biodone call.
+ * here ourselves, and avoiding the xfs_buf_ioend call.
 * This is meant for userdata errors; metadata bufs come with
 * iodone functions attached, so that we can track down errors.
 */
@@ -1081,7 +1140,7 @@ int
 xfs_bdstrat_cb(
        struct xfs_buf  *bp)
 {
-        if (XFS_FORCED_SHUTDOWN(bp->b_mount)) {
+        if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
                trace_xfs_bdstrat_shut(bp, _RET_IP_);
                /*
                 * Metadata write that didn't get logged but
@@ -1387,62 +1446,24 @@ xfs_buf_iomove(
 */
 void
 xfs_wait_buftarg(
-        xfs_buftarg_t   *btp)
+        struct xfs_buftarg      *btp)
-{
-        xfs_buf_t       *bp, *n;
-        xfs_bufhash_t   *hash;
-        uint            i;
-        for (i = 0; i < (1 << btp->bt_hashshift); i++) {
-                hash = &btp->bt_hash[i];
-again:
-                spin_lock(&hash->bh_lock);
-                list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) {
-                        ASSERT(btp == bp->b_target);
-                        if (!(bp->b_flags & XBF_FS_MANAGED)) {
-                                spin_unlock(&hash->bh_lock);
-                                /*
-                                 * Catch superblock reference count leaks
-                                 * immediately
-                                 */
-                                BUG_ON(bp->b_bn == 0);
-                                delay(100);
-                                goto again;
-                        }
-                }
-                spin_unlock(&hash->bh_lock);
-        }
-}
-/*
- *      Allocate buffer hash table for a given target.
- *      For devices containing metadata (i.e. not the log/realtime devices)
- *      we need to allocate a much larger hash table.
- */
-STATIC void
-xfs_alloc_bufhash(
-        xfs_buftarg_t           *btp,
-        int                     external)
 {
-        unsigned int            i;
+        struct xfs_perag        *pag;
+        uint                    i;
-        btp->bt_hashshift = external ? 3 : 12;  /* 8 or 4096 buckets */
+        for (i = 0; i < btp->bt_mount->m_sb.sb_agcount; i++) {
-        btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) *
+                pag = xfs_perag_get(btp->bt_mount, i);
-                                         sizeof(xfs_bufhash_t));
+                spin_lock(&pag->pag_buf_lock);
-        for (i = 0; i < (1 << btp->bt_hashshift); i++) {
+                while (rb_first(&pag->pag_buf_tree)) {
-                spin_lock_init(&btp->bt_hash[i].bh_lock);
+                        spin_unlock(&pag->pag_buf_lock);
-                INIT_LIST_HEAD(&btp->bt_hash[i].bh_list);
+                        delay(100);
+                        spin_lock(&pag->pag_buf_lock);
+                }
+                spin_unlock(&pag->pag_buf_lock);
+                xfs_perag_put(pag);
        }
 }
-STATIC void
-xfs_free_bufhash(
-        xfs_buftarg_t           *btp)
-{
-        kmem_free_large(btp->bt_hash);
-        btp->bt_hash = NULL;
-}
 /*
 *      buftarg list for delwrite queue processing
 */
@@ -1475,7 +1496,6 @@ xfs_free_buftarg(
        xfs_flush_buftarg(btp, 1);
        if (mp->m_flags & XFS_MOUNT_BARRIER)
                xfs_blkdev_issue_flush(btp);
-        xfs_free_bufhash(btp);
        iput(btp->bt_mapping->host);
        /* Unregister the buftarg first so that we don't get a
@@ -1597,6 +1617,7 @@ out_error:
 xfs_buftarg_t *
 xfs_alloc_buftarg(
+        struct xfs_mount        *mp,
        struct block_device     *bdev,
        int                     external,
        const char              *fsname)
@@ -1605,6 +1626,7 @@ xfs_alloc_buftarg(
        btp = kmem_zalloc(sizeof(*btp), KM_SLEEP);
+        btp->bt_mount = mp;
        btp->bt_dev =  bdev->bd_dev;
        btp->bt_bdev = bdev;
        if (xfs_setsize_buftarg_early(btp, bdev))
@@ -1613,7 +1635,6 @@ xfs_alloc_buftarg(
                goto error;
        if (xfs_alloc_delwrite_queue(btp, fsname))
                goto error;
-        xfs_alloc_bufhash(btp, external);
        return btp;
 error:
@@ -1904,7 +1925,7 @@ xfs_flush_buftarg(
                        bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
                        list_del_init(&bp->b_list);
-                        xfs_iowait(bp);
+                        xfs_buf_iowait(bp);
                        xfs_buf_relse(bp);
                }
        }
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 9d021c73ea52..383a3f37cf98 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -51,7 +51,6 @@ typedef enum {
 #define XBF_DONE        (1 << 5) /* all pages in the buffer uptodate */
 #define XBF_DELWRI      (1 << 6) /* buffer has dirty pages */
 #define XBF_STALE       (1 << 7) /* buffer has been staled, do not find it */
-#define XBF_FS_MANAGED  (1 << 8) /* filesystem controls freeing memory */
 #define XBF_ORDERED     (1 << 11)/* use ordered writes */
 #define XBF_READ_AHEAD  (1 << 12)/* asynchronous read-ahead */
 #define XBF_LOG_BUFFER  (1 << 13)/* this is a buffer used for the log */
@@ -96,7 +95,6 @@ typedef unsigned int xfs_buf_flags_t;
        { XBF_DONE,             "DONE" }, \
        { XBF_DELWRI,           "DELWRI" }, \
        { XBF_STALE,            "STALE" }, \
-        { XBF_FS_MANAGED,       "FS_MANAGED" }, \
        { XBF_ORDERED,          "ORDERED" }, \
        { XBF_READ_AHEAD,       "READ_AHEAD" }, \
        { XBF_LOCK,             "LOCK" },       /* should never be set */\
@@ -123,14 +121,11 @@ typedef struct xfs_buftarg {
        dev_t                   bt_dev;
        struct block_device     *bt_bdev;
        struct address_space    *bt_mapping;
+        struct xfs_mount        *bt_mount;
        unsigned int            bt_bsize;
        unsigned int            bt_sshift;
        size_t                  bt_smask;
-        /* per device buffer hash table */
-        uint                    bt_hashshift;
-        xfs_bufhash_t           *bt_hash;
        /* per device delwri queue */
        struct task_struct      *bt_task;
        struct list_head        bt_list;
@@ -158,34 +153,41 @@ typedef int (*xfs_buf_bdstrat_t)(struct xfs_buf *);
 #define XB_PAGES        2
 typedef struct xfs_buf {
+        /*
+         * first cacheline holds all the fields needed for an uncontended cache
+         * hit to be fully processed. The semaphore straddles the cacheline
+         * boundary, but the counter and lock sits on the first cacheline,
+         * which is the only bit that is touched if we hit the semaphore
+         * fast-path on locking.
+         */
+        struct rb_node          b_rbnode;       /* rbtree node */
+        xfs_off_t               b_file_offset;  /* offset in file */
+        size_t                  b_buffer_length;/* size of buffer in bytes */
+        atomic_t                b_hold;         /* reference count */
+        xfs_buf_flags_t         b_flags;        /* status flags */
        struct semaphore        b_sema;         /* semaphore for lockables */
-        unsigned long           b_queuetime;    /* time buffer was queued */
-        atomic_t                b_pin_count;    /* pin count */
        wait_queue_head_t       b_waiters;      /* unpin waiters */
        struct list_head        b_list;
-        xfs_buf_flags_t         b_flags;        /* status flags */
+        struct xfs_perag        *b_pag;         /* contains rbtree root */
-        struct list_head        b_hash_list;    /* hash table list */
-        xfs_bufhash_t           *b_hash;        /* hash table list start */
        xfs_buftarg_t           *b_target;      /* buffer target (device) */
-        atomic_t                b_hold;         /* reference count */
        xfs_daddr_t             b_bn;           /* block number for I/O */
-        xfs_off_t               b_file_offset;  /* offset in file */
-        size_t                  b_buffer_length;/* size of buffer in bytes */
        size_t                  b_count_desired;/* desired transfer size */
        void                    *b_addr;        /* virtual address of buffer */
        struct work_struct      b_iodone_work;
-        atomic_t                b_io_remaining; /* #outstanding I/O requests */
        xfs_buf_iodone_t        b_iodone;       /* I/O completion function */
        xfs_buf_relse_t         b_relse;        /* releasing function */
        struct completion       b_iowait;       /* queue for I/O waiters */
        void                    *b_fspriv;
        void                    *b_fspriv2;
-        struct xfs_mount        *b_mount;
-        unsigned short          b_error;        /* error code on I/O */
-        unsigned int            b_page_count;   /* size of page array */
-        unsigned int            b_offset;       /* page offset in first page */
        struct page             **b_pages;      /* array of page pointers */
        struct page             *b_page_array[XB_PAGES]; /* inline pages */
+        unsigned long           b_queuetime;    /* time buffer was queued */
+        atomic_t                b_pin_count;    /* pin count */
+        atomic_t                b_io_remaining; /* #outstanding I/O requests */
+        unsigned int            b_page_count;   /* size of page array */
+        unsigned int            b_offset;       /* page offset in first page */
+        unsigned short          b_error;        /* error code on I/O */
 #ifdef XFS_BUF_LOCK_TRACKING
        int                     b_last_holder;
 #endif
@@ -204,11 +206,13 @@ extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t,
                                xfs_buf_flags_t);
 extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *);
-extern xfs_buf_t *xfs_buf_get_noaddr(size_t, xfs_buftarg_t *);
+extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int);
 extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t);
 extern void xfs_buf_hold(xfs_buf_t *);
-extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t,
+extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t);
-                                xfs_buf_flags_t);
+struct xfs_buf *xfs_buf_read_uncached(struct xfs_mount *mp,
+                                struct xfs_buftarg *target,
+                                xfs_daddr_t daddr, size_t length, int flags);
 /* Releasing Buffers */
 extern void xfs_buf_free(xfs_buf_t *);
@@ -233,6 +237,8 @@ extern int xfs_buf_iorequest(xfs_buf_t *);
 extern int xfs_buf_iowait(xfs_buf_t *);
 extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
                                xfs_buf_rw_t);
+#define xfs_buf_zero(bp, off, len) \
+            xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
 static inline int xfs_buf_geterror(xfs_buf_t *bp)
 {
@@ -267,8 +273,6 @@ extern void xfs_buf_terminate(void);
                                        XFS_BUF_DONE(bp);       \
                                } while (0)
-#define XFS_BUF_UNMANAGE(bp)    ((bp)->b_flags &= ~XBF_FS_MANAGED)
 #define XFS_BUF_DELAYWRITE(bp)          ((bp)->b_flags |= XBF_DELWRI)
 #define XFS_BUF_UNDELAYWRITE(bp)        xfs_buf_delwri_dequeue(bp)
 #define XFS_BUF_ISDELAYWRITE(bp)        ((bp)->b_flags & XBF_DELWRI)
@@ -347,25 +351,11 @@ static inline void xfs_buf_relse(xfs_buf_t *bp)
        xfs_buf_rele(bp);
 }
-#define xfs_biodone(bp)         xfs_buf_ioend(bp, 0)
-#define xfs_biomove(bp, off, len, data, rw) \
-            xfs_buf_iomove((bp), (off), (len), (data), \
-                ((rw) == XBF_WRITE) ? XBRW_WRITE : XBRW_READ)
-#define xfs_biozero(bp, off, len) \
-            xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
-#define xfs_iowait(bp)  xfs_buf_iowait(bp)
-#define xfs_baread(target, rablkno, ralen)  \
-        xfs_buf_readahead((target), (rablkno), (ralen), XBF_DONT_BLOCK)
 /*
 *      Handling of buftargs.
 */
-extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int, const char *);
+extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *,
+                        struct block_device *, int, const char *);
 extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *);
 extern void xfs_wait_buftarg(xfs_buftarg_t *);
 extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
diff --git a/fs/xfs/linux-2.6/xfs_cred.h b/fs/xfs/linux-2.6/xfs_cred.h
deleted file mode 100644
index 55bddf3b6091..000000000000
--- a/fs/xfs/linux-2.6/xfs_cred.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_CRED_H__
-#define __XFS_CRED_H__
-#include <linux/capability.h>
-/*
- * Credentials
- */
-typedef const struct cred cred_t;
-#endif  /* __XFS_CRED_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
index 1f279b012f94..ed88ed16811c 100644
--- a/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -32,10 +32,9 @@ xfs_tosspages(
        xfs_off_t       last,
        int             fiopt)
 {
-        struct address_space *mapping = VFS_I(ip)->i_mapping;
+        /* can't toss partial tail pages, so mask them out */
+        last &= ~(PAGE_SIZE - 1);
-        if (mapping->nrpages)
+        truncate_inode_pages_range(VFS_I(ip)->i_mapping, first, last - 1);
-                truncate_inode_pages(mapping, first);
 }
 int
@@ -50,12 +49,11 @@ xfs_flushinval_pages(
        trace_xfs_pagecache_inval(ip, first, last);
-        if (mapping->nrpages) {
+        xfs_iflags_clear(ip, XFS_ITRUNCATED);
-                xfs_iflags_clear(ip, XFS_ITRUNCATED);
+        ret = filemap_write_and_wait_range(mapping, first,
-                ret = filemap_write_and_wait(mapping);
+                                last == -1 ? LLONG_MAX : last);
-                if (!ret)
+        if (!ret)
-                        truncate_inode_pages(mapping, first);
+                truncate_inode_pages_range(mapping, first, last);
-        }
        return -ret;
 }
@@ -71,10 +69,9 @@ xfs_flush_pages(
        int             ret = 0;
        int             ret2;
-        if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
+        xfs_iflags_clear(ip, XFS_ITRUNCATED);
-                xfs_iflags_clear(ip, XFS_ITRUNCATED);
+        ret = -filemap_fdatawrite_range(mapping, first,
-                ret = -filemap_fdatawrite(mapping);
+                                last == -1 ? LLONG_MAX : last);
-        }
        if (flags & XBF_ASYNC)
                return ret;
        ret2 = xfs_wait_on_pages(ip, first, last);
@@ -91,7 +88,9 @@ xfs_wait_on_pages(
 {
        struct address_space *mapping = VFS_I(ip)->i_mapping;
-        if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
+        if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) {
-                return -filemap_fdatawait(mapping);
+                return -filemap_fdatawait_range(mapping, first,
+                                        last == -1 ? ip->i_size - 1 : last);
+        }
        return 0;
 }
diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c
index 2ae8b1ccb02e..76e81cff70b9 100644
--- a/fs/xfs/linux-2.6/xfs_globals.c
+++ b/fs/xfs/linux-2.6/xfs_globals.c
@@ -16,7 +16,6 @@
 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 #include "xfs.h"
-#include "xfs_cred.h"
 #include "xfs_sysctl.h"
 /*
diff --git a/fs/xfs/linux-2.6/xfs_globals.h b/fs/xfs/linux-2.6/xfs_globals.h
deleted file mode 100644
index 69f71caf061c..000000000000
--- a/fs/xfs/linux-2.6/xfs_globals.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_GLOBALS_H__
-#define __XFS_GLOBALS_H__
-extern uint64_t xfs_panic_mask;         /* set to cause more panics */
-#endif  /* __XFS_GLOBALS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 3b9e626f7cd1..2ea238f6d38e 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -790,7 +790,7 @@ xfs_ioc_fsgetxattr(
        xfs_ilock(ip, XFS_ILOCK_SHARED);
        fa.fsx_xflags = xfs_ip2xflags(ip);
        fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog;
-        fa.fsx_projid = ip->i_d.di_projid;
+        fa.fsx_projid = xfs_get_projid(ip);
        if (attr) {
                if (ip->i_afp) {
@@ -909,10 +909,10 @@ xfs_ioctl_setattr(
                return XFS_ERROR(EIO);
        /*
-         * Disallow 32bit project ids because on-disk structure
+         * Disallow 32bit project ids when projid32bit feature is not enabled.
-         * is 16bit only.
         */
-        if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1))
+        if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1) &&
+                        !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb))
                return XFS_ERROR(EINVAL);
        /*
@@ -961,7 +961,7 @@ xfs_ioctl_setattr(
        if (mask & FSX_PROJID) {
                if (XFS_IS_QUOTA_RUNNING(mp) &&
                    XFS_IS_PQUOTA_ON(mp) &&
-                    ip->i_d.di_projid != fa->fsx_projid) {
+                    xfs_get_projid(ip) != fa->fsx_projid) {
                        ASSERT(tp);
                        code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
                                                capable(CAP_FOWNER) ?
@@ -1063,12 +1063,12 @@ xfs_ioctl_setattr(
                 * Change the ownerships and register quota modifications
                 * in the transaction.
                 */
-                if (ip->i_d.di_projid != fa->fsx_projid) {
+                if (xfs_get_projid(ip) != fa->fsx_projid) {
                        if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) {
                                olddquot = xfs_qm_vop_chown(tp, ip,
                                                        &ip->i_gdquot, gdqp);
                        }
-                        ip->i_d.di_projid = fa->fsx_projid;
+                        xfs_set_projid(ip, fa->fsx_projid);
                        /*
                         * We may have to rev the inode as well as
@@ -1088,8 +1088,8 @@ xfs_ioctl_setattr(
                xfs_diflags_to_linux(ip);
        }
+        xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-        xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
        XFS_STATS_INC(xs_ig_attrchg);
@@ -1301,7 +1301,8 @@ xfs_file_ioctl(
        case XFS_IOC_ALLOCSP64:
        case XFS_IOC_FREESP64:
        case XFS_IOC_RESVSP64:
-        case XFS_IOC_UNRESVSP64: {
+        case XFS_IOC_UNRESVSP64:
+        case XFS_IOC_ZERO_RANGE: {
                xfs_flock64_t           bf;
                if (copy_from_user(&bf, arg, sizeof(bf)))
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index 6c83f7f62dc9..b3486dfa5520 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -164,7 +164,8 @@ xfs_ioctl32_bstat_copyin(
            get_user(bstat->bs_extsize, &bstat32->bs_extsize)   ||
            get_user(bstat->bs_extents, &bstat32->bs_extents)   ||
            get_user(bstat->bs_gen,     &bstat32->bs_gen)       ||
-            get_user(bstat->bs_projid,  &bstat32->bs_projid)    ||
+            get_user(bstat->bs_projid_lo, &bstat32->bs_projid_lo) ||
+            get_user(bstat->bs_projid_hi, &bstat32->bs_projid_hi) ||
            get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask) ||
            get_user(bstat->bs_dmstate, &bstat32->bs_dmstate)   ||
            get_user(bstat->bs_aextents, &bstat32->bs_aextents))
@@ -218,6 +219,7 @@ xfs_bulkstat_one_fmt_compat(
            put_user(buffer->bs_extents,  &p32->bs_extents)     ||
            put_user(buffer->bs_gen,      &p32->bs_gen)         ||
            put_user(buffer->bs_projid,   &p32->bs_projid)      ||
+            put_user(buffer->bs_projid_hi,      &p32->bs_projid_hi)     ||
            put_user(buffer->bs_dmevmask, &p32->bs_dmevmask)    ||
            put_user(buffer->bs_dmstate,  &p32->bs_dmstate)     ||
            put_user(buffer->bs_aextents, &p32->bs_aextents))
@@ -574,6 +576,7 @@ xfs_file_compat_ioctl(
        case XFS_IOC_FSGEOMETRY_V1:
        case XFS_IOC_FSGROWFSDATA:
        case XFS_IOC_FSGROWFSRT:
+        case XFS_IOC_ZERO_RANGE:
                return xfs_file_ioctl(filp, cmd, p);
 #else
        case XFS_IOC_ALLOCSP_32:
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h
index 1024c4f8ba0d..08b605792a99 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.h
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.h
@@ -65,8 +65,10 @@ typedef struct compat_xfs_bstat {
        __s32           bs_extsize;     /* extent size                  */
        __s32           bs_extents;     /* number of extents            */
        __u32           bs_gen;         /* generation count             */
-        __u16           bs_projid;      /* project id                   */
+        __u16           bs_projid_lo;   /* lower part of project id     */
-        unsigned char   bs_pad[14];     /* pad space, unused            */
+#define bs_projid       bs_projid_lo    /* (previously just bs_projid)  */
+        __u16           bs_projid_hi;   /* high part of project id      */
+        unsigned char   bs_pad[12];     /* pad space, unused            */
        __u32           bs_dmevmask;    /* DMIG event mask              */
        __u16           bs_dmstate;     /* DMIG state info              */
        __u16           bs_aextents;    /* attribute number of extents  */
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index b1fc2a6bfe83..ec858e09d546 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -95,41 +95,6 @@ xfs_mark_inode_dirty(
 }
 /*
- * Change the requested timestamp in the given inode.
- * We don't lock across timestamp updates, and we don't log them but
- * we do record the fact that there is dirty information in core.
- */
-void
-xfs_ichgtime(
-        xfs_inode_t     *ip,
-        int             flags)
-{
-        struct inode    *inode = VFS_I(ip);
-        timespec_t      tv;
-        int             sync_it = 0;
-        tv = current_fs_time(inode->i_sb);
-        if ((flags & XFS_ICHGTIME_MOD) &&
-            !timespec_equal(&inode->i_mtime, &tv)) {
-                inode->i_mtime = tv;
-                sync_it = 1;
-        }
-        if ((flags & XFS_ICHGTIME_CHG) &&
-            !timespec_equal(&inode->i_ctime, &tv)) {
-                inode->i_ctime = tv;
-                sync_it = 1;
-        }
-        /*
-         * Update complete - now make sure everyone knows that the inode
-         * is dirty.
-         */
-        if (sync_it)
-                xfs_mark_inode_dirty_sync(ip);
-}
-/*
 * Hook in SELinux.  This is not quite correct yet, what we really need
 * here (as we do for default ACLs) is a mechanism by which creation of
 * these attrs can be journalled at inode creation time (along with the
@@ -224,7 +189,7 @@ xfs_vn_mknod(
        }
        xfs_dentry_to_name(&name, dentry);
-        error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip, NULL);
+        error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip);
        if (unlikely(error))
                goto out_free_acl;
@@ -397,7 +362,7 @@ xfs_vn_symlink(
                (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO);
        xfs_dentry_to_name(&name, dentry);
-        error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip, NULL);
+        error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip);
        if (unlikely(error))
                goto out;
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 2fa0bd9ebc7f..214ddd71ff79 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -71,6 +71,7 @@
 #include <linux/random.h>
 #include <linux/ctype.h>
 #include <linux/writeback.h>
+#include <linux/capability.h>
 #include <asm/page.h>
 #include <asm/div64.h>
@@ -79,14 +80,12 @@
 #include <asm/byteorder.h>
 #include <asm/unaligned.h>
-#include <xfs_cred.h>
 #include <xfs_vnode.h>
 #include <xfs_stats.h>
 #include <xfs_sysctl.h>
 #include <xfs_iops.h>
 #include <xfs_aops.h>
 #include <xfs_super.h>
-#include <xfs_globals.h>
 #include <xfs_buf.h>
 /*
@@ -144,7 +143,7 @@
 #define SYNCHRONIZE()   barrier()
 #define __return_address __builtin_return_address(0)
-#define dfltprid        0
+#define XFS_PROJID_DEFAULT      0
 #define MAXPATHLEN      1024
 #define MIN(a,b)        (min(a,b))
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 08fd3102128c..ab31ce5aeaf9 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -44,7 +44,6 @@
 #include "xfs_buf_item.h"
 #include "xfs_utils.h"
 #include "xfs_vnodeops.h"
-#include "xfs_version.h"
 #include "xfs_log_priv.h"
 #include "xfs_trans_priv.h"
 #include "xfs_filestream.h"
@@ -645,7 +644,7 @@ xfs_barrier_test(
        XFS_BUF_ORDERED(sbp);
        xfsbdstrat(mp, sbp);
-        error = xfs_iowait(sbp);
+        error = xfs_buf_iowait(sbp);
        /*
         * Clear all the flags we set and possible error state in the
@@ -757,18 +756,20 @@ xfs_open_devices(
         * Setup xfs_mount buffer target pointers
         */
        error = ENOMEM;
-        mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0, mp->m_fsname);
+        mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, 0, mp->m_fsname);
        if (!mp->m_ddev_targp)
                goto out_close_rtdev;
        if (rtdev) {
-                mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1, mp->m_fsname);
+                mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, 1,
+                                                        mp->m_fsname);
                if (!mp->m_rtdev_targp)
                        goto out_free_ddev_targ;
        }
        if (logdev && logdev != ddev) {
-                mp->m_logdev_targp = xfs_alloc_buftarg(logdev, 1, mp->m_fsname);
+                mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, 1,
+                                                        mp->m_fsname);
                if (!mp->m_logdev_targp)
                        goto out_free_rtdev_targ;
        } else {
@@ -971,12 +972,7 @@ xfs_fs_inode_init_once(
 /*
 * Dirty the XFS inode when mark_inode_dirty_sync() is called so that
- * we catch unlogged VFS level updates to the inode. Care must be taken
+ * we catch unlogged VFS level updates to the inode.
- * here - the transaction code calls mark_inode_dirty_sync() to mark the
- * VFS inode dirty in a transaction and clears the i_update_core field;
- * it must clear the field after calling mark_inode_dirty_sync() to
- * correctly indicate that the dirty state has been propagated into the
- * inode log item.
 *
 * We need the barrier() to maintain correct ordering between unlogged
 * updates and the transaction commit code that clears the i_update_core
@@ -1520,8 +1516,9 @@ xfs_fs_fill_super(
        if (error)
                goto out_free_fsname;
-        if (xfs_icsb_init_counters(mp))
+        error = xfs_icsb_init_counters(mp);
-                mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
+        if (error)
+                goto out_close_devices;
        error = xfs_readsb(mp, flags);
        if (error)
@@ -1582,6 +1579,7 @@ xfs_fs_fill_super(
        xfs_freesb(mp);
 out_destroy_counters:
        xfs_icsb_destroy_counters(mp);
+ out_close_devices:
        xfs_close_devices(mp);
 out_free_fsname:
        xfs_free_fsname(mp);
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index 1ef4a4d2d997..50a3266c999e 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -62,6 +62,7 @@ extern void xfs_qm_exit(void);
 # define XFS_DBG_STRING         "no debug"
 #endif
+#define XFS_VERSION_STRING      "SGI XFS"
 #define XFS_BUILD_OPTIONS       XFS_ACL_STRING \
                                XFS_SECURITY_STRING \
                                XFS_REALTIME_STRING \
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 81976ffed7d6..37d33254981d 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -39,42 +39,39 @@
 #include <linux/kthread.h>
 #include <linux/freezer.h>
+/*
+ * The inode lookup is done in batches to keep the amount of lock traffic and
+ * radix tree lookups to a minimum. The batch size is a trade off between
+ * lookup reduction and stack usage. This is in the reclaim path, so we can't
+ * be too greedy.
+ */
+#define XFS_LOOKUP_BATCH        32
-STATIC xfs_inode_t *
+STATIC int
-xfs_inode_ag_lookup(
+xfs_inode_ag_walk_grab(
-        struct xfs_mount        *mp,
+        struct xfs_inode        *ip)
-        struct xfs_perag        *pag,
-        uint32_t                *first_index,
-        int                     tag)
 {
-        int                     nr_found;
+        struct inode            *inode = VFS_I(ip);
-        struct xfs_inode        *ip;
-        /*
+        /* nothing to sync during shutdown */
-         * use a gang lookup to find the next inode in the tree
+        if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-         * as the tree is sparse and a gang lookup walks to find
+                return EFSCORRUPTED;
-         * the number of objects requested.
-         */
+        /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
-        if (tag == XFS_ICI_NO_TAG) {
+        if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
-                nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
+                return ENOENT;
-                                (void **)&ip, *first_index, 1);
-        } else {
+        /* If we can't grab the inode, it must on it's way to reclaim. */
-                nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
+        if (!igrab(inode))
-                                (void **)&ip, *first_index, 1, tag);
+                return ENOENT;
+        if (is_bad_inode(inode)) {
+                IRELE(ip);
+                return ENOENT;
        }
-        if (!nr_found)
-                return NULL;
-        /*
+        /* inode is valid */
-         * Update the index for the next lookup. Catch overflows
+        return 0;
-         * into the next AG range which can occur if we have inodes
-         * in the last block of the AG and we are currently
-         * pointing to the last inode.
-         */
-        *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
-        if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
-                return NULL;
-        return ip;
 }
 STATIC int
@@ -83,49 +80,75 @@ xfs_inode_ag_walk(
        struct xfs_perag        *pag,
        int                     (*execute)(struct xfs_inode *ip,
                                           struct xfs_perag *pag, int flags),
-        int                     flags,
+        int                     flags)
-        int                     tag,
-        int                     exclusive,
-        int                     *nr_to_scan)
 {
        uint32_t                first_index;
        int                     last_error = 0;
        int                     skipped;
+        int                     done;
+        int                     nr_found;
 restart:
+        done = 0;
        skipped = 0;
        first_index = 0;
+        nr_found = 0;
        do {
+                struct xfs_inode *batch[XFS_LOOKUP_BATCH];
                int             error = 0;
-                xfs_inode_t     *ip;
+                int             i;
-                if (exclusive)
+                read_lock(&pag->pag_ici_lock);
-                        write_lock(&pag->pag_ici_lock);
+                nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
-                else
+                                        (void **)batch, first_index,
-                        read_lock(&pag->pag_ici_lock);
+                                        XFS_LOOKUP_BATCH);
-                ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag);
+                if (!nr_found) {
-                if (!ip) {
+                        read_unlock(&pag->pag_ici_lock);
-                        if (exclusive)
-                                write_unlock(&pag->pag_ici_lock);
-                        else
-                                read_unlock(&pag->pag_ici_lock);
                        break;
                }
-                /* execute releases pag->pag_ici_lock */
+                /*
-                error = execute(ip, pag, flags);
+                 * Grab the inodes before we drop the lock. if we found
-                if (error == EAGAIN) {
+                 * nothing, nr == 0 and the loop will be skipped.
-                        skipped++;
+                 */
-                        continue;
+                for (i = 0; i < nr_found; i++) {
+                        struct xfs_inode *ip = batch[i];
+                        if (done || xfs_inode_ag_walk_grab(ip))
+                                batch[i] = NULL;
+                        /*
+                         * Update the index for the next lookup. Catch overflows
+                         * into the next AG range which can occur if we have inodes
+                         * in the last block of the AG and we are currently
+                         * pointing to the last inode.
+                         */
+                        first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+                        if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
+                                done = 1;
+                }
+                /* unlock now we've grabbed the inodes. */
+                read_unlock(&pag->pag_ici_lock);
+                for (i = 0; i < nr_found; i++) {
+                        if (!batch[i])
+                                continue;
+                        error = execute(batch[i], pag, flags);
+                        IRELE(batch[i]);
+                        if (error == EAGAIN) {
+                                skipped++;
+                                continue;
+                        }
+                        if (error && last_error != EFSCORRUPTED)
+                                last_error = error;
                }
-                if (error)
-                        last_error = error;
                /* bail out if the filesystem is corrupted.  */
                if (error == EFSCORRUPTED)
                        break;
-        } while ((*nr_to_scan)--);
+        } while (nr_found && !done);
        if (skipped) {
                delay(1);
@@ -134,110 +157,32 @@ restart:
        return last_error;
 }
-/*
- * Select the next per-ag structure to iterate during the walk. The reclaim
- * walk is optimised only to walk AGs with reclaimable inodes in them.
- */
-static struct xfs_perag *
-xfs_inode_ag_iter_next_pag(
-        struct xfs_mount        *mp,
-        xfs_agnumber_t          *first,
-        int                     tag)
-{
-        struct xfs_perag        *pag = NULL;
-        if (tag == XFS_ICI_RECLAIM_TAG) {
-                int found;
-                int ref;
-                spin_lock(&mp->m_perag_lock);
-                found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
-                                (void **)&pag, *first, 1, tag);
-                if (found <= 0) {
-                        spin_unlock(&mp->m_perag_lock);
-                        return NULL;
-                }
-                *first = pag->pag_agno + 1;
-                /* open coded pag reference increment */
-                ref = atomic_inc_return(&pag->pag_ref);
-                spin_unlock(&mp->m_perag_lock);
-                trace_xfs_perag_get_reclaim(mp, pag->pag_agno, ref, _RET_IP_);
-        } else {
-                pag = xfs_perag_get(mp, *first);
-                (*first)++;
-        }
-        return pag;
-}
 int
 xfs_inode_ag_iterator(
        struct xfs_mount        *mp,
        int                     (*execute)(struct xfs_inode *ip,
                                           struct xfs_perag *pag, int flags),
-        int                     flags,
+        int                     flags)
-        int                     tag,
-        int                     exclusive,
-        int                     *nr_to_scan)
 {
        struct xfs_perag        *pag;
        int                     error = 0;
        int                     last_error = 0;
        xfs_agnumber_t          ag;
-        int                     nr;
-        nr = nr_to_scan ? *nr_to_scan : INT_MAX;
        ag = 0;
-        while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag, tag))) {
+        while ((pag = xfs_perag_get(mp, ag))) {
-                error = xfs_inode_ag_walk(mp, pag, execute, flags, tag,
+                ag = pag->pag_agno + 1;
-                                                exclusive, &nr);
+                error = xfs_inode_ag_walk(mp, pag, execute, flags);
                xfs_perag_put(pag);
                if (error) {
                        last_error = error;
                        if (error == EFSCORRUPTED)
                                break;
                }
-                if (nr <= 0)
-                        break;
        }
-        if (nr_to_scan)
-                *nr_to_scan = nr;
        return XFS_ERROR(last_error);
 }
-/* must be called with pag_ici_lock held and releases it */
-int
-xfs_sync_inode_valid(
-        struct xfs_inode        *ip,
-        struct xfs_perag        *pag)
-{
-        struct inode            *inode = VFS_I(ip);
-        int                     error = EFSCORRUPTED;
-        /* nothing to sync during shutdown */
-        if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-                goto out_unlock;
-        /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
-        error = ENOENT;
-        if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
-                goto out_unlock;
-        /* If we can't grab the inode, it must on it's way to reclaim. */
-        if (!igrab(inode))
-                goto out_unlock;
-        if (is_bad_inode(inode)) {
-                IRELE(ip);
-                goto out_unlock;
-        }
-        /* inode is valid */
-        error = 0;
-out_unlock:
-        read_unlock(&pag->pag_ici_lock);
-        return error;
-}
 STATIC int
 xfs_sync_inode_data(
        struct xfs_inode        *ip,
@@ -248,10 +193,6 @@ xfs_sync_inode_data(
        struct address_space *mapping = inode->i_mapping;
        int                     error = 0;
-        error = xfs_sync_inode_valid(ip, pag);
-        if (error)
-                return error;
        if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
                goto out_wait;
@@ -268,7 +209,6 @@ xfs_sync_inode_data(
 out_wait:
        if (flags & SYNC_WAIT)
                xfs_ioend_wait(ip);
-        IRELE(ip);
        return error;
 }
@@ -280,10 +220,6 @@ xfs_sync_inode_attr(
 {
        int                     error = 0;
-        error = xfs_sync_inode_valid(ip, pag);
-        if (error)
-                return error;
        xfs_ilock(ip, XFS_ILOCK_SHARED);
        if (xfs_inode_clean(ip))
                goto out_unlock;
@@ -302,7 +238,6 @@ xfs_sync_inode_attr(
 out_unlock:
        xfs_iunlock(ip, XFS_ILOCK_SHARED);
-        IRELE(ip);
        return error;
 }
@@ -318,8 +253,7 @@ xfs_sync_data(
        ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
-        error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
+        error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags);
-                                      XFS_ICI_NO_TAG, 0, NULL);
        if (error)
                return XFS_ERROR(error);
@@ -337,8 +271,7 @@ xfs_sync_attr(
 {
        ASSERT((flags & ~SYNC_WAIT) == 0);
-        return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,
+        return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags);
-                                     XFS_ICI_NO_TAG, 0, NULL);
 }
 STATIC int
@@ -698,6 +631,43 @@ __xfs_inode_clear_reclaim_tag(
 }
 /*
+ * Grab the inode for reclaim exclusively.
+ * Return 0 if we grabbed it, non-zero otherwise.
+ */
+STATIC int
+xfs_reclaim_inode_grab(
+        struct xfs_inode        *ip,
+        int                     flags)
+{
+        /*
+         * do some unlocked checks first to avoid unnecceary lock traffic.
+         * The first is a flush lock check, the second is a already in reclaim
+         * check. Only do these checks if we are not going to block on locks.
+         */
+        if ((flags & SYNC_TRYLOCK) &&
+            (!ip->i_flush.done || __xfs_iflags_test(ip, XFS_IRECLAIM))) {
+                return 1;
+        }
+        /*
+         * The radix tree lock here protects a thread in xfs_iget from racing
+         * with us starting reclaim on the inode.  Once we have the
+         * XFS_IRECLAIM flag set it will not touch us.
+         */
+        spin_lock(&ip->i_flags_lock);
+        ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE));
+        if (__xfs_iflags_test(ip, XFS_IRECLAIM)) {
+                /* ignore as it is already under reclaim */
+                spin_unlock(&ip->i_flags_lock);
+                return 1;
+        }
+        __xfs_iflags_set(ip, XFS_IRECLAIM);
+        spin_unlock(&ip->i_flags_lock);
+        return 0;
+}
+/*
 * Inodes in different states need to be treated differently, and the return
 * value of xfs_iflush is not sufficient to get this right. The following table
 * lists the inode states and the reclaim actions necessary for non-blocking
@@ -755,23 +725,6 @@ xfs_reclaim_inode(
 {
        int     error = 0;
-        /*
-         * The radix tree lock here protects a thread in xfs_iget from racing
-         * with us starting reclaim on the inode.  Once we have the
-         * XFS_IRECLAIM flag set it will not touch us.
-         */
-        spin_lock(&ip->i_flags_lock);
-        ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE));
-        if (__xfs_iflags_test(ip, XFS_IRECLAIM)) {
-                /* ignore as it is already under reclaim */
-                spin_unlock(&ip->i_flags_lock);
-                write_unlock(&pag->pag_ici_lock);
-                return 0;
-        }
-        __xfs_iflags_set(ip, XFS_IRECLAIM);
-        spin_unlock(&ip->i_flags_lock);
-        write_unlock(&pag->pag_ici_lock);
        xfs_ilock(ip, XFS_ILOCK_EXCL);
        if (!xfs_iflock_nowait(ip)) {
                if (!(sync_mode & SYNC_WAIT))
@@ -868,13 +821,126 @@ reclaim:
 }
+/*
+ * Walk the AGs and reclaim the inodes in them. Even if the filesystem is
+ * corrupted, we still want to try to reclaim all the inodes. If we don't,
+ * then a shut down during filesystem unmount reclaim walk leak all the
+ * unreclaimed inodes.
+ */
+int
+xfs_reclaim_inodes_ag(
+        struct xfs_mount        *mp,
+        int                     flags,
+        int                     *nr_to_scan)
+{
+        struct xfs_perag        *pag;
+        int                     error = 0;
+        int                     last_error = 0;
+        xfs_agnumber_t          ag;
+        int                     trylock = flags & SYNC_TRYLOCK;
+        int                     skipped;
+restart:
+        ag = 0;
+        skipped = 0;
+        while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
+                unsigned long   first_index = 0;
+                int             done = 0;
+                int             nr_found = 0;
+                ag = pag->pag_agno + 1;
+                if (trylock) {
+                        if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) {
+                                skipped++;
+                                continue;
+                        }
+                        first_index = pag->pag_ici_reclaim_cursor;
+                } else
+                        mutex_lock(&pag->pag_ici_reclaim_lock);
+                do {
+                        struct xfs_inode *batch[XFS_LOOKUP_BATCH];
+                        int     i;
+                        write_lock(&pag->pag_ici_lock);
+                        nr_found = radix_tree_gang_lookup_tag(
+                                        &pag->pag_ici_root,
+                                        (void **)batch, first_index,
+                                        XFS_LOOKUP_BATCH,
+                                        XFS_ICI_RECLAIM_TAG);
+                        if (!nr_found) {
+                                write_unlock(&pag->pag_ici_lock);
+                                break;
+                        }
+                        /*
+                         * Grab the inodes before we drop the lock. if we found
+                         * nothing, nr == 0 and the loop will be skipped.
+                         */
+                        for (i = 0; i < nr_found; i++) {
+                                struct xfs_inode *ip = batch[i];
+                                if (done || xfs_reclaim_inode_grab(ip, flags))
+                                        batch[i] = NULL;
+                                /*
+                                 * Update the index for the next lookup. Catch
+                                 * overflows into the next AG range which can
+                                 * occur if we have inodes in the last block of
+                                 * the AG and we are currently pointing to the
+                                 * last inode.
+                                 */
+                                first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+                                if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
+                                        done = 1;
+                        }
+                        /* unlock now we've grabbed the inodes. */
+                        write_unlock(&pag->pag_ici_lock);
+                        for (i = 0; i < nr_found; i++) {
+                                if (!batch[i])
+                                        continue;
+                                error = xfs_reclaim_inode(batch[i], pag, flags);
+                                if (error && last_error != EFSCORRUPTED)
+                                        last_error = error;
+                        }
+                        *nr_to_scan -= XFS_LOOKUP_BATCH;
+                } while (nr_found && !done && *nr_to_scan > 0);
+                if (trylock && !done)
+                        pag->pag_ici_reclaim_cursor = first_index;
+                else
+                        pag->pag_ici_reclaim_cursor = 0;
+                mutex_unlock(&pag->pag_ici_reclaim_lock);
+                xfs_perag_put(pag);
+        }
+        /*
+         * if we skipped any AG, and we still have scan count remaining, do
+         * another pass this time using blocking reclaim semantics (i.e
+         * waiting on the reclaim locks and ignoring the reclaim cursors). This
+         * ensure that when we get more reclaimers than AGs we block rather
+         * than spin trying to execute reclaim.
+         */
+        if (trylock && skipped && *nr_to_scan > 0) {
+                trylock = 0;
+                goto restart;
+        }
+        return XFS_ERROR(last_error);
+}
 int
 xfs_reclaim_inodes(
        xfs_mount_t     *mp,
        int             mode)
 {
-        return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode,
+        int             nr_to_scan = INT_MAX;
-                                        XFS_ICI_RECLAIM_TAG, 1, NULL);
+        return xfs_reclaim_inodes_ag(mp, mode, &nr_to_scan);
 }
 /*
@@ -896,17 +962,16 @@ xfs_reclaim_inode_shrink(
                if (!(gfp_mask & __GFP_FS))
                        return -1;
-                xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0,
+                xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK, &nr_to_scan);
-                                        XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan);
+                /* terminate if we don't exhaust the scan */
-                /* if we don't exhaust the scan, don't bother coming back */
                if (nr_to_scan > 0)
                        return -1;
       }
        reclaimable = 0;
        ag = 0;
-        while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag,
+        while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
-                                        XFS_ICI_RECLAIM_TAG))) {
+                ag = pag->pag_agno + 1;
                reclaimable += pag->pag_ici_reclaimable;
                xfs_perag_put(pag);
        }
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index fe78726196f8..32ba6628290c 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -47,10 +47,10 @@ void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip);
 void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
                                struct xfs_inode *ip);
-int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag);
+int xfs_sync_inode_grab(struct xfs_inode *ip);
 int xfs_inode_ag_iterator(struct xfs_mount *mp,
        int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
-        int flags, int tag, int write_lock, int *nr_to_scan);
+        int flags);
 void xfs_inode_shrinker_register(struct xfs_mount *mp);
 void xfs_inode_shrinker_unregister(struct xfs_mount *mp);
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index 8fe311a456e2..acef2e98c594 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -124,7 +124,7 @@ DEFINE_EVENT(xfs_perag_class, name,	\
                 unsigned long caller_ip),                                      \
        TP_ARGS(mp, agno, refcount, caller_ip))
 DEFINE_PERAG_REF_EVENT(xfs_perag_get);
-DEFINE_PERAG_REF_EVENT(xfs_perag_get_reclaim);
+DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag);
 DEFINE_PERAG_REF_EVENT(xfs_perag_put);
 DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim);
 DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim);
@@ -330,7 +330,7 @@ DEFINE_BUF_EVENT(xfs_buf_iowait_done);
 DEFINE_BUF_EVENT(xfs_buf_delwri_queue);
 DEFINE_BUF_EVENT(xfs_buf_delwri_dequeue);
 DEFINE_BUF_EVENT(xfs_buf_delwri_split);
-DEFINE_BUF_EVENT(xfs_buf_get_noaddr);
+DEFINE_BUF_EVENT(xfs_buf_get_uncached);
 DEFINE_BUF_EVENT(xfs_bdstrat_shut);
 DEFINE_BUF_EVENT(xfs_buf_item_relse);
 DEFINE_BUF_EVENT(xfs_buf_item_iodone);
diff --git a/fs/xfs/linux-2.6/xfs_version.h b/fs/xfs/linux-2.6/xfs_version.h
deleted file mode 100644
index f8d279d7563a..000000000000
--- a/fs/xfs/linux-2.6/xfs_version.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2001-2002,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_VERSION_H__
-#define __XFS_VERSION_H__
-/*
- * Dummy file that can contain a timestamp to put into the
- * XFS init string, to help users keep track of what they're
- * running
- */
-#define XFS_VERSION_STRING "SGI XFS"
-#endif /* __XFS_VERSION_H__ */
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index e1a2f6800e01..faf8e1a83a12 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -463,87 +463,68 @@ xfs_qm_dqtobp(
        uint                    flags)
 {
        xfs_bmbt_irec_t map;
-        int             nmaps, error;
+        int             nmaps = 1, error;
        xfs_buf_t       *bp;
-        xfs_inode_t     *quotip;
+        xfs_inode_t     *quotip = XFS_DQ_TO_QIP(dqp);
-        xfs_mount_t     *mp;
+        xfs_mount_t     *mp = dqp->q_mount;
        xfs_disk_dquot_t *ddq;
-        xfs_dqid_t      id;
+        xfs_dqid_t      id = be32_to_cpu(dqp->q_core.d_id);
-        boolean_t       newdquot;
        xfs_trans_t     *tp = (tpp ? *tpp : NULL);
-        mp = dqp->q_mount;
+        dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk;
-        id = be32_to_cpu(dqp->q_core.d_id);
-        nmaps = 1;
-        newdquot = B_FALSE;
-        /*
+        xfs_ilock(quotip, XFS_ILOCK_SHARED);
-         * If we don't know where the dquot lives, find out.
+        if (XFS_IS_THIS_QUOTA_OFF(dqp)) {
-         */
-        if (dqp->q_blkno == (xfs_daddr_t) 0) {
-                /* We use the id as an index */
-                dqp->q_fileoffset = (xfs_fileoff_t)id /
-                                        mp->m_quotainfo->qi_dqperchunk;
-                nmaps = 1;
-                quotip = XFS_DQ_TO_QIP(dqp);
-                xfs_ilock(quotip, XFS_ILOCK_SHARED);
                /*
-                 * Return if this type of quotas is turned off while we didn't
+                 * Return if this type of quotas is turned off while we
-                 * have an inode lock
+                 * didn't have the quota inode lock.
                 */
-                if (XFS_IS_THIS_QUOTA_OFF(dqp)) {
+                xfs_iunlock(quotip, XFS_ILOCK_SHARED);
-                        xfs_iunlock(quotip, XFS_ILOCK_SHARED);
+                return ESRCH;
-                        return (ESRCH);
+        }
-                }
+        /*
+         * Find the block map; no allocations yet
+         */
+        error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset,
+                          XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA,
+                          NULL, 0, &map, &nmaps, NULL);
+        xfs_iunlock(quotip, XFS_ILOCK_SHARED);
+        if (error)
+                return error;
+        ASSERT(nmaps == 1);
+        ASSERT(map.br_blockcount == 1);
+        /*
+         * Offset of dquot in the (fixed sized) dquot chunk.
+         */
+        dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) *
+                sizeof(xfs_dqblk_t);
+        ASSERT(map.br_startblock != DELAYSTARTBLOCK);
+        if (map.br_startblock == HOLESTARTBLOCK) {
                /*
-                 * Find the block map; no allocations yet
+                 * We don't allocate unless we're asked to
                 */
-                error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset,
+                if (!(flags & XFS_QMOPT_DQALLOC))
-                                  XFS_DQUOT_CLUSTER_SIZE_FSB,
+                        return ENOENT;
-                                  XFS_BMAPI_METADATA,
-                                  NULL, 0, &map, &nmaps, NULL);
-                xfs_iunlock(quotip, XFS_ILOCK_SHARED);
+                ASSERT(tp);
+                error = xfs_qm_dqalloc(tpp, mp, dqp, quotip,
+                                        dqp->q_fileoffset, &bp);
                if (error)
-                        return (error);
+                        return error;
-                ASSERT(nmaps == 1);
+                tp = *tpp;
-                ASSERT(map.br_blockcount == 1);
+        } else {
+                trace_xfs_dqtobp_read(dqp);
                /*
-                 * offset of dquot in the (fixed sized) dquot chunk.
+                 * store the blkno etc so that we don't have to do the
+                 * mapping all the time
                 */
-                dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) *
+                dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
-                        sizeof(xfs_dqblk_t);
-                if (map.br_startblock == HOLESTARTBLOCK) {
-                        /*
-                         * We don't allocate unless we're asked to
-                         */
-                        if (!(flags & XFS_QMOPT_DQALLOC))
-                                return (ENOENT);
-                        ASSERT(tp);
-                        if ((error = xfs_qm_dqalloc(tpp, mp, dqp, quotip,
-                                                dqp->q_fileoffset, &bp)))
-                                return (error);
-                        tp = *tpp;
-                        newdquot = B_TRUE;
-                } else {
-                        /*
-                         * store the blkno etc so that we don't have to do the
-                         * mapping all the time
-                         */
-                        dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
-                }
-        }
-        ASSERT(dqp->q_blkno != DELAYSTARTBLOCK);
-        ASSERT(dqp->q_blkno != HOLESTARTBLOCK);
-        /*
-         * Read in the buffer, unless we've just done the allocation
-         * (in which case we already have the buf).
-         */
-        if (!newdquot) {
-                trace_xfs_dqtobp_read(dqp);
                error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
                                           dqp->q_blkno,
@@ -552,13 +533,14 @@ xfs_qm_dqtobp(
                if (error || !bp)
                        return XFS_ERROR(error);
        }
        ASSERT(XFS_BUF_ISBUSY(bp));
        ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
        /*
         * calculate the location of the dquot inside the buffer.
         */
-        ddq = (xfs_disk_dquot_t *)((char *)XFS_BUF_PTR(bp) + dqp->q_bufoffset);
+        ddq = (struct xfs_disk_dquot *)(XFS_BUF_PTR(bp) + dqp->q_bufoffset);
        /*
         * A simple sanity check in case we got a corrupted dquot...
@@ -1176,18 +1158,18 @@ xfs_qm_dqflush(
        xfs_dquot_t             *dqp,
        uint                    flags)
 {
-        xfs_mount_t             *mp;
+        struct xfs_mount        *mp = dqp->q_mount;
-        xfs_buf_t               *bp;
+        struct xfs_buf          *bp;
-        xfs_disk_dquot_t        *ddqp;
+        struct xfs_disk_dquot   *ddqp;
        int                     error;
        ASSERT(XFS_DQ_IS_LOCKED(dqp));
        ASSERT(!completion_done(&dqp->q_flush));
        trace_xfs_dqflush(dqp);
        /*
-         * If not dirty, or it's pinned and we are not supposed to
+         * If not dirty, or it's pinned and we are not supposed to block, nada.
-         * block, nada.
         */
        if (!XFS_DQ_IS_DIRTY(dqp) ||
            (!(flags & SYNC_WAIT) && atomic_read(&dqp->q_pincount) > 0)) {
@@ -1201,40 +1183,46 @@ xfs_qm_dqflush(
         * down forcibly. If that's the case we must not write this dquot
         * to disk, because the log record didn't make it to disk!
         */
-        if (XFS_FORCED_SHUTDOWN(dqp->q_mount)) {
+        if (XFS_FORCED_SHUTDOWN(mp)) {
-                dqp->dq_flags &= ~(XFS_DQ_DIRTY);
+                dqp->dq_flags &= ~XFS_DQ_DIRTY;
                xfs_dqfunlock(dqp);
                return XFS_ERROR(EIO);
        }
        /*
         * Get the buffer containing the on-disk dquot
-         * We don't need a transaction envelope because we know that the
-         * the ondisk-dquot has already been allocated for.
         */
-        if ((error = xfs_qm_dqtobp(NULL, dqp, &ddqp, &bp, XFS_QMOPT_DOWARN))) {
+        error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno,
+                                   mp->m_quotainfo->qi_dqchunklen, 0, &bp);
+        if (error) {
                ASSERT(error != ENOENT);
-                /*
-                 * Quotas could have gotten turned off (ESRCH)
-                 */
                xfs_dqfunlock(dqp);
-                return (error);
+                return error;
        }
-        if (xfs_qm_dqcheck(&dqp->q_core, be32_to_cpu(ddqp->d_id),
+        /*
-                           0, XFS_QMOPT_DOWARN, "dqflush (incore copy)")) {
+         * Calculate the location of the dquot inside the buffer.
-                xfs_force_shutdown(dqp->q_mount, SHUTDOWN_CORRUPT_INCORE);
+         */
+        ddqp = (struct xfs_disk_dquot *)(XFS_BUF_PTR(bp) + dqp->q_bufoffset);
+        /*
+         * A simple sanity check in case we got a corrupted dquot..
+         */
+        if (xfs_qm_dqcheck(&dqp->q_core, be32_to_cpu(ddqp->d_id), 0,
+                           XFS_QMOPT_DOWARN, "dqflush (incore copy)")) {
+                xfs_buf_relse(bp);
+                xfs_dqfunlock(dqp);
+                xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
                return XFS_ERROR(EIO);
        }
        /* This is the only portion of data that needs to persist */
-        memcpy(ddqp, &(dqp->q_core), sizeof(xfs_disk_dquot_t));
+        memcpy(ddqp, &dqp->q_core, sizeof(xfs_disk_dquot_t));
        /*
         * Clear the dirty field and remember the flush lsn for later use.
         */
-        dqp->dq_flags &= ~(XFS_DQ_DIRTY);
+        dqp->dq_flags &= ~XFS_DQ_DIRTY;
-        mp = dqp->q_mount;
        xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn,
                                        &dqp->q_logitem.qli_item.li_lsn);
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 9a92407109a1..f8e854b4fde8 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -55,8 +55,6 @@ uint		ndquot;
 kmem_zone_t     *qm_dqzone;
 kmem_zone_t     *qm_dqtrxzone;
-static cred_t   xfs_zerocr;
 STATIC void     xfs_qm_list_init(xfs_dqlist_t *, char *, int);
 STATIC void     xfs_qm_list_destroy(xfs_dqlist_t *);
@@ -837,7 +835,7 @@ xfs_qm_dqattach_locked(
                        xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
                                                flags & XFS_QMOPT_DQALLOC,
                                                ip->i_udquot, &ip->i_gdquot) :
-                        xfs_qm_dqattach_one(ip, ip->i_d.di_projid, XFS_DQ_PROJ,
+                        xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ,
                                                flags & XFS_QMOPT_DQALLOC,
                                                ip->i_udquot, &ip->i_gdquot);
                /*
@@ -1199,87 +1197,6 @@ xfs_qm_list_destroy(
        mutex_destroy(&(list->qh_lock));
 }
-/*
- * Stripped down version of dqattach. This doesn't attach, or even look at the
- * dquots attached to the inode. The rationale is that there won't be any
- * attached at the time this is called from quotacheck.
- */
-STATIC int
-xfs_qm_dqget_noattach(
-        xfs_inode_t     *ip,
-        xfs_dquot_t     **O_udqpp,
-        xfs_dquot_t     **O_gdqpp)
-{
-        int             error;
-        xfs_mount_t     *mp;
-        xfs_dquot_t     *udqp, *gdqp;
-        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-        mp = ip->i_mount;
-        udqp = NULL;
-        gdqp = NULL;
-        if (XFS_IS_UQUOTA_ON(mp)) {
-                ASSERT(ip->i_udquot == NULL);
-                /*
-                 * We want the dquot allocated if it doesn't exist.
-                 */
-                if ((error = xfs_qm_dqget(mp, ip, ip->i_d.di_uid, XFS_DQ_USER,
-                                         XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN,
-                                         &udqp))) {
-                        /*
-                         * Shouldn't be able to turn off quotas here.
-                         */
-                        ASSERT(error != ESRCH);
-                        ASSERT(error != ENOENT);
-                        return error;
-                }
-                ASSERT(udqp);
-        }
-        if (XFS_IS_OQUOTA_ON(mp)) {
-                ASSERT(ip->i_gdquot == NULL);
-                if (udqp)
-                        xfs_dqunlock(udqp);
-                error = XFS_IS_GQUOTA_ON(mp) ?
-                                xfs_qm_dqget(mp, ip,
-                                             ip->i_d.di_gid, XFS_DQ_GROUP,
-                                             XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN,
-                                             &gdqp) :
-                                xfs_qm_dqget(mp, ip,
-                                             ip->i_d.di_projid, XFS_DQ_PROJ,
-                                             XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN,
-                                             &gdqp);
-                if (error) {
-                        if (udqp)
-                                xfs_qm_dqrele(udqp);
-                        ASSERT(error != ESRCH);
-                        ASSERT(error != ENOENT);
-                        return error;
-                }
-                ASSERT(gdqp);
-                /* Reacquire the locks in the right order */
-                if (udqp) {
-                        if (! xfs_qm_dqlock_nowait(udqp)) {
-                                xfs_dqunlock(gdqp);
-                                xfs_dqlock(udqp);
-                                xfs_dqlock(gdqp);
-                        }
-                }
-        }
-        *O_udqpp = udqp;
-        *O_gdqpp = gdqp;
-#ifdef QUOTADEBUG
-        if (udqp) ASSERT(XFS_DQ_IS_LOCKED(udqp));
-        if (gdqp) ASSERT(XFS_DQ_IS_LOCKED(gdqp));
-#endif
-        return 0;
-}
 /*
 * Create an inode and return with a reference already taken, but unlocked
 * This is how we create quota inodes
@@ -1305,8 +1222,8 @@ xfs_qm_qino_alloc(
                return error;
        }
-        if ((error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0,
+        error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, &committed);
-                                   &xfs_zerocr, 0, 1, ip, &committed))) {
+        if (error) {
                xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
                                 XFS_TRANS_ABORT);
                return error;
@@ -1516,7 +1433,7 @@ xfs_qm_dqiterate(
                                rablkcnt =  map[i+1].br_blockcount;
                                rablkno = map[i+1].br_startblock;
                                while (rablkcnt--) {
-                                        xfs_baread(mp->m_ddev_targp,
+                                        xfs_buf_readahead(mp->m_ddev_targp,
                                               XFS_FSB_TO_DADDR(mp, rablkno),
                                               mp->m_quotainfo->qi_dqchunklen);
                                        rablkno++;
@@ -1546,18 +1463,34 @@ xfs_qm_dqiterate(
 /*
 * Called by dqusage_adjust in doing a quotacheck.
- * Given the inode, and a dquot (either USR or GRP, doesn't matter),
+ *
- * this updates its incore copy as well as the buffer copy. This is
+ * Given the inode, and a dquot id this updates both the incore dqout as well
- * so that once the quotacheck is done, we can just log all the buffers,
+ * as the buffer copy. This is so that once the quotacheck is done, we can
- * as opposed to logging numerous updates to individual dquots.
+ * just log all the buffers, as opposed to logging numerous updates to
+ * individual dquots.
 */
-STATIC void
+STATIC int
 xfs_qm_quotacheck_dqadjust(
-        xfs_dquot_t             *dqp,
+        struct xfs_inode        *ip,
+        xfs_dqid_t              id,
+        uint                    type,
        xfs_qcnt_t              nblks,
        xfs_qcnt_t              rtblks)
 {
-        ASSERT(XFS_DQ_IS_LOCKED(dqp));
+        struct xfs_mount        *mp = ip->i_mount;
+        struct xfs_dquot        *dqp;
+        int                     error;
+        error = xfs_qm_dqget(mp, ip, id, type,
+                             XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, &dqp);
+        if (error) {
+                /*
+                 * Shouldn't be able to turn off quotas here.
+                 */
+                ASSERT(error != ESRCH);
+                ASSERT(error != ENOENT);
+                return error;
+        }
        trace_xfs_dqadjust(dqp);
@@ -1582,11 +1515,13 @@ xfs_qm_quotacheck_dqadjust(
         * There are no timers for the default values set in the root dquot.
         */
        if (dqp->q_core.d_id) {
-                xfs_qm_adjust_dqlimits(dqp->q_mount, &dqp->q_core);
+                xfs_qm_adjust_dqlimits(mp, &dqp->q_core);
-                xfs_qm_adjust_dqtimers(dqp->q_mount, &dqp->q_core);
+                xfs_qm_adjust_dqtimers(mp, &dqp->q_core);
        }
        dqp->dq_flags |= XFS_DQ_DIRTY;
+        xfs_qm_dqput(dqp);
+        return 0;
 }
 STATIC int
@@ -1629,8 +1564,7 @@ xfs_qm_dqusage_adjust(
        int             *res)           /* result code value */
 {
        xfs_inode_t     *ip;
-        xfs_dquot_t     *udqp, *gdqp;
+        xfs_qcnt_t      nblks, rtblks = 0;
-        xfs_qcnt_t      nblks, rtblks;
        int             error;
        ASSERT(XFS_IS_QUOTA_RUNNING(mp));
@@ -1650,51 +1584,24 @@ xfs_qm_dqusage_adjust(
         * the case in all other instances. It's OK that we do this because
         * quotacheck is done only at mount time.
         */
-        if ((error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip))) {
+        error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip);
+        if (error) {
                *res = BULKSTAT_RV_NOTHING;
                return error;
        }
-        /*
+        ASSERT(ip->i_delayed_blks == 0);
-         * Obtain the locked dquots. In case of an error (eg. allocation
-         * fails for ENOSPC), we return the negative of the error number
-         * to bulkstat, so that it can get propagated to quotacheck() and
-         * making us disable quotas for the file system.
-         */
-        if ((error = xfs_qm_dqget_noattach(ip, &udqp, &gdqp))) {
-                xfs_iunlock(ip, XFS_ILOCK_EXCL);
-                IRELE(ip);
-                *res = BULKSTAT_RV_GIVEUP;
-                return error;
-        }
-        rtblks = 0;
+        if (XFS_IS_REALTIME_INODE(ip)) {
-        if (! XFS_IS_REALTIME_INODE(ip)) {
-                nblks = (xfs_qcnt_t)ip->i_d.di_nblocks;
-        } else {
                /*
                 * Walk thru the extent list and count the realtime blocks.
                 */
-                if ((error = xfs_qm_get_rtblks(ip, &rtblks))) {
+                error = xfs_qm_get_rtblks(ip, &rtblks);
-                        xfs_iunlock(ip, XFS_ILOCK_EXCL);
+                if (error)
-                        IRELE(ip);
+                        goto error0;
-                        if (udqp)
-                                xfs_qm_dqput(udqp);
-                        if (gdqp)
-                                xfs_qm_dqput(gdqp);
-                        *res = BULKSTAT_RV_GIVEUP;
-                        return error;
-                }
-                nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks;
        }
-        ASSERT(ip->i_delayed_blks == 0);
-        /*
+        nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks;
-         * We can't release the inode while holding its dquot locks.
-         * The inode can go into inactive and might try to acquire the dquotlocks.
-         * So, just unlock here and do a vn_rele at the end.
-         */
-        xfs_iunlock(ip, XFS_ILOCK_EXCL);
        /*
         * Add the (disk blocks and inode) resources occupied by this
@@ -1709,26 +1616,36 @@ xfs_qm_dqusage_adjust(
         * and quotaoffs don't race. (Quotachecks happen at mount time only).
         */
        if (XFS_IS_UQUOTA_ON(mp)) {
-                ASSERT(udqp);
+                error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_uid,
-                xfs_qm_quotacheck_dqadjust(udqp, nblks, rtblks);
+                                                   XFS_DQ_USER, nblks, rtblks);
-                xfs_qm_dqput(udqp);
+                if (error)
+                        goto error0;
        }
-        if (XFS_IS_OQUOTA_ON(mp)) {
-                ASSERT(gdqp);
+        if (XFS_IS_GQUOTA_ON(mp)) {
-                xfs_qm_quotacheck_dqadjust(gdqp, nblks, rtblks);
+                error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_gid,
-                xfs_qm_dqput(gdqp);
+                                                   XFS_DQ_GROUP, nblks, rtblks);
+                if (error)
+                        goto error0;
        }
-        /*
-         * Now release the inode. This will send it to 'inactive', and
-         * possibly even free blocks.
-         */
-        IRELE(ip);
-        /*
+        if (XFS_IS_PQUOTA_ON(mp)) {
-         * Goto next inode.
+                error = xfs_qm_quotacheck_dqadjust(ip, xfs_get_projid(ip),
-         */
+                                                   XFS_DQ_PROJ, nblks, rtblks);
+                if (error)
+                        goto error0;
+        }
+        xfs_iunlock(ip, XFS_ILOCK_EXCL);
+        IRELE(ip);
        *res = BULKSTAT_RV_DIDONE;
        return 0;
+error0:
+        xfs_iunlock(ip, XFS_ILOCK_EXCL);
+        IRELE(ip);
+        *res = BULKSTAT_RV_GIVEUP;
+        return error;
 }
 /*
@@ -2224,7 +2141,7 @@ xfs_qm_write_sb_changes(
 /*
- * Given an inode, a uid and gid (from cred_t) make sure that we have
+ * Given an inode, a uid, gid and prid make sure that we have
 * allocated relevant dquot(s) on disk, and that we won't exceed inode
 * quotas by creating this file.
 * This also attaches dquot(s) to the given inode after locking it,
@@ -2332,7 +2249,7 @@ xfs_qm_vop_dqalloc(
                        xfs_dqunlock(gq);
                }
        } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {
-                if (ip->i_d.di_projid != prid) {
+                if (xfs_get_projid(ip) != prid) {
                        xfs_iunlock(ip, lockflags);
                        if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid,
                                                 XFS_DQ_PROJ,
@@ -2454,7 +2371,7 @@ xfs_qm_vop_chown_reserve(
        }
        if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) {
                if (XFS_IS_PQUOTA_ON(ip->i_mount) &&
-                     ip->i_d.di_projid != be32_to_cpu(gdqp->q_core.d_id))
+                     xfs_get_projid(ip) != be32_to_cpu(gdqp->q_core.d_id))
                        prjflags = XFS_QMOPT_ENOSPC;
                if (prjflags ||
@@ -2558,7 +2475,7 @@ xfs_qm_vop_create_dqattach(
                ip->i_gdquot = gdqp;
                ASSERT(XFS_IS_OQUOTA_ON(mp));
                ASSERT((XFS_IS_GQUOTA_ON(mp) ?
-                        ip->i_d.di_gid : ip->i_d.di_projid) ==
+                        ip->i_d.di_gid : xfs_get_projid(ip)) ==
                                be32_to_cpu(gdqp->q_core.d_id));
                xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
        }
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index bea02d786c5d..45b5cb1788ab 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -81,7 +81,7 @@ xfs_qm_statvfs(
        xfs_mount_t             *mp = ip->i_mount;
        xfs_dquot_t             *dqp;
-        if (!xfs_qm_dqget(mp, NULL, ip->i_d.di_projid, XFS_DQ_PROJ, 0, &dqp)) {
+        if (!xfs_qm_dqget(mp, NULL, xfs_get_projid(ip), XFS_DQ_PROJ, 0, &dqp)) {
                xfs_fill_statvfs_from_dquot(statp, &dqp->q_core);
                xfs_qm_dqput(dqp);
        }
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 45e5849df238..bdebc183223e 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -276,7 +276,7 @@ xfs_qm_scall_trunc_qfile(
                goto out_unlock;
        }
-        xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+        xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
        error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
 out_unlock:
@@ -875,21 +875,14 @@ xfs_dqrele_inode(
        struct xfs_perag        *pag,
        int                     flags)
 {
-        int                     error;
        /* skip quota inodes */
        if (ip == ip->i_mount->m_quotainfo->qi_uquotaip ||
            ip == ip->i_mount->m_quotainfo->qi_gquotaip) {
                ASSERT(ip->i_udquot == NULL);
                ASSERT(ip->i_gdquot == NULL);
-                read_unlock(&pag->pag_ici_lock);
                return 0;
        }
-        error = xfs_sync_inode_valid(ip, pag);
-        if (error)
-                return error;
        xfs_ilock(ip, XFS_ILOCK_EXCL);
        if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) {
                xfs_qm_dqrele(ip->i_udquot);
@@ -900,8 +893,6 @@ xfs_dqrele_inode(
                ip->i_gdquot = NULL;
        }
        xfs_iunlock(ip, XFS_ILOCK_EXCL);
-        IRELE(ip);
        return 0;
 }
@@ -918,8 +909,7 @@ xfs_qm_dqrele_all_inodes(
        uint             flags)
 {
        ASSERT(mp->m_quotainfo);
-        xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags,
+        xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags);
-                                XFS_ICI_NO_TAG, 0, NULL);
 }
 /*------------------------------------------------------------------------*/
@@ -1175,7 +1165,7 @@ xfs_qm_internalqcheck_adjust(
        }
        xfs_qm_internalqcheck_get_dquots(mp,
                                        (xfs_dqid_t) ip->i_d.di_uid,
-                                        (xfs_dqid_t) ip->i_d.di_projid,
+                                        (xfs_dqid_t) xfs_get_projid(ip),
                                        (xfs_dqid_t) ip->i_d.di_gid,
                                        &ud, &gd);
        if (XFS_IS_UQUOTA_ON(mp)) {
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 4917d4eed4ed..63c7a1a6c022 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -230,6 +230,15 @@ typedef struct xfs_perag {
        rwlock_t        pag_ici_lock;   /* incore inode lock */
        struct radix_tree_root pag_ici_root;    /* incore inode cache root */
        int             pag_ici_reclaimable;    /* reclaimable inodes */
+        struct mutex    pag_ici_reclaim_lock;   /* serialisation point */
+        unsigned long   pag_ici_reclaim_cursor; /* reclaim restart point */
+        /* buffer cache index */
+        spinlock_t      pag_buf_lock;   /* lock for pag_buf_tree */
+        struct rb_root  pag_buf_tree;   /* ordered tree of active buffers */
+        /* for rcu-safe freeing */
+        struct rcu_head rcu_head;
 #endif
        int             pagb_count;     /* pagb slots in use */
 } xfs_perag_t;
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index af168faccc7a..112abc439ca5 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -675,7 +675,7 @@ xfs_alloc_ag_vextent_near(
        xfs_agblock_t   gtbnoa;         /* aligned ... */
        xfs_extlen_t    gtdiff;         /* difference to right side entry */
        xfs_extlen_t    gtlen;          /* length of right side entry */
-        xfs_extlen_t    gtlena;         /* aligned ... */
+        xfs_extlen_t    gtlena = 0;     /* aligned ... */
        xfs_agblock_t   gtnew;          /* useful start bno of right side */
        int             error;          /* error code */
        int             i;              /* result code, temporary */
@@ -684,7 +684,7 @@ xfs_alloc_ag_vextent_near(
        xfs_agblock_t   ltbnoa;         /* aligned ... */
        xfs_extlen_t    ltdiff;         /* difference to left side entry */
        xfs_extlen_t    ltlen;          /* length of left side entry */
-        xfs_extlen_t    ltlena;         /* aligned ... */
+        xfs_extlen_t    ltlena = 0;     /* aligned ... */
        xfs_agblock_t   ltnew;          /* useful start bno of left side */
        xfs_extlen_t    rlen;           /* length of returned extent */
 #if defined(DEBUG) && defined(__KERNEL__)
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index 97f7328967fd..3916925e2584 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -280,38 +280,6 @@ xfs_allocbt_key_diff(
        return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
 }
-STATIC int
-xfs_allocbt_kill_root(
-        struct xfs_btree_cur    *cur,
-        struct xfs_buf          *bp,
-        int                     level,
-        union xfs_btree_ptr     *newroot)
-{
-        int                     error;
-        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-        XFS_BTREE_STATS_INC(cur, killroot);
-        /*
-         * Update the root pointer, decreasing the level by 1 and then
-         * free the old root.
-         */
-        xfs_allocbt_set_root(cur, newroot, -1);
-        error = xfs_allocbt_free_block(cur, bp);
-        if (error) {
-                XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
-                return error;
-        }
-        XFS_BTREE_STATS_INC(cur, free);
-        xfs_btree_setbuf(cur, level, NULL);
-        cur->bc_nlevels--;
-        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-        return 0;
-}
 #ifdef DEBUG
 STATIC int
 xfs_allocbt_keys_inorder(
@@ -423,7 +391,6 @@ static const struct xfs_btree_ops xfs_allocbt_ops = {
        .dup_cursor             = xfs_allocbt_dup_cursor,
        .set_root               = xfs_allocbt_set_root,
-        .kill_root              = xfs_allocbt_kill_root,
        .alloc_block            = xfs_allocbt_alloc_block,
        .free_block             = xfs_allocbt_free_block,
        .update_lastrec         = xfs_allocbt_update_lastrec,
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index c2568242a901..c86375378810 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -355,16 +355,15 @@ xfs_attr_set_int(
                        if (mp->m_flags & XFS_MOUNT_WSYNC) {
                                xfs_trans_set_sync(args.trans);
                        }
+                        if (!error && (flags & ATTR_KERNOTIME) == 0) {
+                                xfs_trans_ichgtime(args.trans, dp,
+                                                        XFS_ICHGTIME_CHG);
+                        }
                        err2 = xfs_trans_commit(args.trans,
                                                 XFS_TRANS_RELEASE_LOG_RES);
                        xfs_iunlock(dp, XFS_ILOCK_EXCL);
-                        /*
-                         * Hit the inode change time.
-                         */
-                        if (!error && (flags & ATTR_KERNOTIME) == 0) {
-                                xfs_ichgtime(dp, XFS_ICHGTIME_CHG);
-                        }
                        return(error == 0 ? err2 : error);
                }
@@ -420,6 +419,9 @@ xfs_attr_set_int(
                xfs_trans_set_sync(args.trans);
        }
+        if ((flags & ATTR_KERNOTIME) == 0)
+                xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG);
        /*
         * Commit the last in the sequence of transactions.
         */
@@ -427,13 +429,6 @@ xfs_attr_set_int(
        error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES);
        xfs_iunlock(dp, XFS_ILOCK_EXCL);
-        /*
-         * Hit the inode change time.
-         */
-        if (!error && (flags & ATTR_KERNOTIME) == 0) {
-                xfs_ichgtime(dp, XFS_ICHGTIME_CHG);
-        }
        return(error);
 out:
@@ -567,6 +562,9 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
                xfs_trans_set_sync(args.trans);
        }
+        if ((flags & ATTR_KERNOTIME) == 0)
+                xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG);
        /*
         * Commit the last in the sequence of transactions.
         */
@@ -574,13 +572,6 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
        error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES);
        xfs_iunlock(dp, XFS_ILOCK_EXCL);
-        /*
-         * Hit the inode change time.
-         */
-        if (!error && (flags & ATTR_KERNOTIME) == 0) {
-                xfs_ichgtime(dp, XFS_ICHGTIME_CHG);
-        }
        return(error);
 out:
@@ -1995,7 +1986,7 @@ xfs_attr_rmtval_get(xfs_da_args_t *args)
                        tmp = (valuelen < XFS_BUF_SIZE(bp))
                                ? valuelen : XFS_BUF_SIZE(bp);
-                        xfs_biomove(bp, 0, tmp, dst, XBF_READ);
+                        xfs_buf_iomove(bp, 0, tmp, dst, XBRW_READ);
                        xfs_buf_relse(bp);
                        dst += tmp;
                        valuelen -= tmp;
@@ -2125,9 +2116,9 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
                tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen :
                                                        XFS_BUF_SIZE(bp);
-                xfs_biomove(bp, 0, tmp, src, XBF_WRITE);
+                xfs_buf_iomove(bp, 0, tmp, src, XBRW_WRITE);
                if (tmp < XFS_BUF_SIZE(bp))
-                        xfs_biozero(bp, tmp, XFS_BUF_SIZE(bp) - tmp);
+                        xfs_buf_zero(bp, tmp, XFS_BUF_SIZE(bp) - tmp);
                if ((error = xfs_bwrite(mp, bp))) {/* GROT: NOTE: synchronous write */
                        return (error);
                }
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index f90dadd5a968..8abd12e32e13 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -614,7 +614,7 @@ xfs_bmap_add_extent(
                        nblks += cur->bc_private.b.allocated;
                ASSERT(nblks <= da_old);
                if (nblks < da_old)
-                        xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS,
+                        xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS,
                                (int64_t)(da_old - nblks), rsvd);
        }
        /*
@@ -1079,7 +1079,8 @@ xfs_bmap_add_extent_delay_real(
                diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) -
                        (cur ? cur->bc_private.b.allocated : 0));
                if (diff > 0 &&
-                    xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS, -((int64_t)diff), rsvd)) {
+                    xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS,
+                                             -((int64_t)diff), rsvd)) {
                        /*
                         * Ick gross gag me with a spoon.
                         */
@@ -1089,16 +1090,18 @@ xfs_bmap_add_extent_delay_real(
                                        temp--;
                                        diff--;
                                        if (!diff ||
-                                            !xfs_mod_incore_sb(ip->i_mount,
+                                            !xfs_icsb_modify_counters(ip->i_mount,
-                                                    XFS_SBS_FDBLOCKS, -((int64_t)diff), rsvd))
+                                                    XFS_SBS_FDBLOCKS,
+                                                    -((int64_t)diff), rsvd))
                                                break;
                                }
                                if (temp2) {
                                        temp2--;
                                        diff--;
                                        if (!diff ||
-                                            !xfs_mod_incore_sb(ip->i_mount,
+                                            !xfs_icsb_modify_counters(ip->i_mount,
-                                                    XFS_SBS_FDBLOCKS, -((int64_t)diff), rsvd))
+                                                    XFS_SBS_FDBLOCKS,
+                                                    -((int64_t)diff), rsvd))
                                                break;
                                }
                        }
@@ -1766,7 +1769,7 @@ xfs_bmap_add_extent_hole_delay(
        }
        if (oldlen != newlen) {
                ASSERT(oldlen > newlen);
-                xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS,
+                xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS,
                        (int64_t)(oldlen - newlen), rsvd);
                /*
                 * Nothing to do for disk quota accounting here.
@@ -3111,9 +3114,10 @@ xfs_bmap_del_extent(
         * Nothing to do for disk quota accounting here.
         */
        ASSERT(da_old >= da_new);
-        if (da_old > da_new)
+        if (da_old > da_new) {
-                xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, (int64_t)(da_old - da_new),
+                xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
-                        rsvd);
+                        (int64_t)(da_old - da_new), rsvd);
+        }
 done:
        *logflagsp = flags;
        return error;
@@ -4526,13 +4530,13 @@ xfs_bmapi(
                                                        -((int64_t)extsz), (flags &
                                                        XFS_BMAPI_RSVBLOCKS));
                                } else {
-                                        error = xfs_mod_incore_sb(mp,
+                                        error = xfs_icsb_modify_counters(mp,
                                                        XFS_SBS_FDBLOCKS,
                                                        -((int64_t)alen), (flags &
                                                        XFS_BMAPI_RSVBLOCKS));
                                }
                                if (!error) {
-                                        error = xfs_mod_incore_sb(mp,
+                                        error = xfs_icsb_modify_counters(mp,
                                                        XFS_SBS_FDBLOCKS,
                                                        -((int64_t)indlen), (flags &
                                                        XFS_BMAPI_RSVBLOCKS));
@@ -4542,7 +4546,7 @@ xfs_bmapi(
                                                        (int64_t)extsz, (flags &
                                                        XFS_BMAPI_RSVBLOCKS));
                                        else if (error)
-                                                xfs_mod_incore_sb(mp,
+                                                xfs_icsb_modify_counters(mp,
                                                        XFS_SBS_FDBLOCKS,
                                                        (int64_t)alen, (flags &
                                                        XFS_BMAPI_RSVBLOCKS));
@@ -4744,8 +4748,12 @@ xfs_bmapi(
                 * Check if writing previously allocated but
                 * unwritten extents.
                 */
-                if (wr && mval->br_state == XFS_EXT_UNWRITTEN &&
+                if (wr &&
-                    ((flags & (XFS_BMAPI_PREALLOC|XFS_BMAPI_DELAY)) == 0)) {
+                    ((mval->br_state == XFS_EXT_UNWRITTEN &&
+                      ((flags & (XFS_BMAPI_PREALLOC|XFS_BMAPI_DELAY)) == 0)) ||
+                     (mval->br_state == XFS_EXT_NORM &&
+                      ((flags & (XFS_BMAPI_PREALLOC|XFS_BMAPI_CONVERT)) ==
+                                (XFS_BMAPI_PREALLOC|XFS_BMAPI_CONVERT))))) {
                        /*
                         * Modify (by adding) the state flag, if writing.
                         */
@@ -4757,7 +4765,9 @@ xfs_bmapi(
                                        *firstblock;
                                cur->bc_private.b.flist = flist;
                        }
-                        mval->br_state = XFS_EXT_NORM;
+                        mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
+                                                ? XFS_EXT_NORM
+                                                : XFS_EXT_UNWRITTEN;
                        error = xfs_bmap_add_extent(ip, lastx, &cur, mval,
                                firstblock, flist, &tmp_logflags,
                                whichfork, (flags & XFS_BMAPI_RSVBLOCKS));
@@ -5200,7 +5210,7 @@ xfs_bunmapi(
                                        ip, -((long)del.br_blockcount), 0,
                                        XFS_QMOPT_RES_RTBLKS);
                        } else {
-                                xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS,
+                                xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
                                                (int64_t)del.br_blockcount, rsvd);
                                (void)xfs_trans_reserve_quota_nblks(NULL,
                                        ip, -((long)del.br_blockcount), 0,
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index b13569a6179b..71ec9b6ecdfc 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -74,9 +74,12 @@ typedef	struct xfs_bmap_free
 #define XFS_BMAPI_IGSTATE       0x080   /* Ignore state - */
                                        /* combine contig. space */
 #define XFS_BMAPI_CONTIG        0x100   /* must allocate only one extent */
-#define XFS_BMAPI_CONVERT       0x200   /* unwritten extent conversion - */
+/*
-                                        /* need write cache flushing and no */
+ * unwritten extent conversion - this needs write cache flushing and no additional
-                                        /* additional allocation alignments */
+ * allocation alignments. When specified with XFS_BMAPI_PREALLOC it converts
+ * from written to unwritten, otherwise convert from unwritten to written.
+ */
+#define XFS_BMAPI_CONVERT       0x200
 #define XFS_BMAPI_FLAGS \
        { XFS_BMAPI_WRITE,      "WRITE" }, \
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index 829af92f0fba..04f9cca8da7e 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -217,7 +217,7 @@ xfs_btree_del_cursor(
         */
        for (i = 0; i < cur->bc_nlevels; i++) {
                if (cur->bc_bufs[i])
-                        xfs_btree_setbuf(cur, i, NULL);
+                        xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]);
                else if (!error)
                        break;
        }
@@ -656,7 +656,7 @@ xfs_btree_reada_bufl(
        ASSERT(fsbno != NULLFSBLOCK);
        d = XFS_FSB_TO_DADDR(mp, fsbno);
-        xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
+        xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count);
 }
 /*
@@ -676,7 +676,7 @@ xfs_btree_reada_bufs(
        ASSERT(agno != NULLAGNUMBER);
        ASSERT(agbno != NULLAGBLOCK);
        d = XFS_AGB_TO_DADDR(mp, agno, agbno);
-        xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
+        xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count);
 }
 STATIC int
@@ -763,22 +763,19 @@ xfs_btree_readahead(
 * Set the buffer for level "lev" in the cursor to bp, releasing
 * any previous buffer.
 */
-void
+STATIC void
 xfs_btree_setbuf(
        xfs_btree_cur_t         *cur,   /* btree cursor */
        int                     lev,    /* level in btree */
        xfs_buf_t               *bp)    /* new buffer to set */
 {
        struct xfs_btree_block  *b;     /* btree block */
-        xfs_buf_t               *obp;   /* old buffer pointer */
-        obp = cur->bc_bufs[lev];
+        if (cur->bc_bufs[lev])
-        if (obp)
+                xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[lev]);
-                xfs_trans_brelse(cur->bc_tp, obp);
        cur->bc_bufs[lev] = bp;
        cur->bc_ra[lev] = 0;
-        if (!bp)
-                return;
        b = XFS_BUF_TO_BLOCK(bp);
        if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
                if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO)
@@ -3011,6 +3008,43 @@ out0:
        return 0;
 }
+/*
+ * Kill the current root node, and replace it with it's only child node.
+ */
+STATIC int
+xfs_btree_kill_root(
+        struct xfs_btree_cur    *cur,
+        struct xfs_buf          *bp,
+        int                     level,
+        union xfs_btree_ptr     *newroot)
+{
+        int                     error;
+        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+        XFS_BTREE_STATS_INC(cur, killroot);
+        /*
+         * Update the root pointer, decreasing the level by 1 and then
+         * free the old root.
+         */
+        cur->bc_ops->set_root(cur, newroot, -1);
+        error = cur->bc_ops->free_block(cur, bp);
+        if (error) {
+                XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+                return error;
+        }
+        XFS_BTREE_STATS_INC(cur, free);
+        cur->bc_bufs[level] = NULL;
+        cur->bc_ra[level] = 0;
+        cur->bc_nlevels--;
+        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+        return 0;
+}
 STATIC int
 xfs_btree_dec_cursor(
        struct xfs_btree_cur    *cur,
@@ -3195,7 +3229,7 @@ xfs_btree_delrec(
                         * Make it the new root of the btree.
                         */
                        pp = xfs_btree_ptr_addr(cur, 1, block);
-                        error = cur->bc_ops->kill_root(cur, bp, level, pp);
+                        error = xfs_btree_kill_root(cur, bp, level, pp);
                        if (error)
                                goto error0;
                } else if (level > 0) {
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h
index 7fa07062bdda..82fafc66bd1f 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -152,9 +152,7 @@ struct xfs_btree_ops {
        /* update btree root pointer */
        void    (*set_root)(struct xfs_btree_cur *cur,
-                                union xfs_btree_ptr *nptr, int level_change);
+                            union xfs_btree_ptr *nptr, int level_change);
-        int     (*kill_root)(struct xfs_btree_cur *cur, struct xfs_buf *bp,
-                                int level, union xfs_btree_ptr *newroot);
        /* block allocation / freeing */
        int     (*alloc_block)(struct xfs_btree_cur *cur,
@@ -399,16 +397,6 @@ xfs_btree_reada_bufs(
        xfs_agblock_t           agbno,  /* allocation group block number */
        xfs_extlen_t            count); /* count of filesystem blocks */
-/*
- * Set the buffer for level "lev" in the cursor to bp, releasing
- * any previous buffer.
- */
-void
-xfs_btree_setbuf(
-        xfs_btree_cur_t         *cur,   /* btree cursor */
-        int                     lev,    /* level in btree */
-        struct xfs_buf          *bp);   /* new buffer to set */
 /*
 * Common btree core entry points.
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 1b09d7a280df..2686d0d54c5b 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -692,8 +692,7 @@ xfs_buf_item_init(
         * the first.  If we do already have one, there is
         * nothing to do here so return.
         */
-        if (bp->b_mount != mp)
+        ASSERT(bp->b_target->bt_mount == mp);
-                bp->b_mount = mp;
        if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) {
                lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
                if (lip->li_type == XFS_LI_BUF) {
@@ -974,7 +973,7 @@ xfs_buf_iodone_callbacks(
                        xfs_buf_do_callbacks(bp, lip);
                        XFS_BUF_SET_FSPRIVATE(bp, NULL);
                        XFS_BUF_CLR_IODONE_FUNC(bp);
-                        xfs_biodone(bp);
+                        xfs_buf_ioend(bp, 0);
                        return;
                }
@@ -1033,7 +1032,7 @@ xfs_buf_iodone_callbacks(
        xfs_buf_do_callbacks(bp, lip);
        XFS_BUF_SET_FSPRIVATE(bp, NULL);
        XFS_BUF_CLR_IODONE_FUNC(bp);
-        xfs_biodone(bp);
+        xfs_buf_ioend(bp, 0);
 }
 /*
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 30fa0e206fba..1c00bedb3175 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -2042,7 +2042,7 @@ xfs_da_do_buf(
                                mappedbno, nmapped, 0, &bp);
                        break;
                case 3:
-                        xfs_baread(mp->m_ddev_targp, mappedbno, nmapped);
+                        xfs_buf_readahead(mp->m_ddev_targp, mappedbno, nmapped);
                        error = 0;
                        bp = NULL;
                        break;
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h
index e5b153b2e6a3..dffba9ba0db6 100644
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/xfs_dinode.h
@@ -49,8 +49,9 @@ typedef struct xfs_dinode {
        __be32          di_uid;         /* owner's user id */
        __be32          di_gid;         /* owner's group id */
        __be32          di_nlink;       /* number of links to file */
-        __be16          di_projid;      /* owner's project id */
+        __be16          di_projid_lo;   /* lower part of owner's project id */
-        __u8            di_pad[8];      /* unused, zeroed space */
+        __be16          di_projid_hi;   /* higher part owner's project id */
+        __u8            di_pad[6];      /* unused, zeroed space */
        __be16          di_flushiter;   /* incremented on flush */
        xfs_timestamp_t di_atime;       /* time last accessed */
        xfs_timestamp_t di_mtime;       /* time last modified */
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index 504be8640e91..ae891223be90 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -961,7 +961,7 @@ xfs_dir2_leaf_getdents(
                                if (i > ra_current &&
                                    map[ra_index].br_blockcount >=
                                    mp->m_dirblkfsbs) {
-                                        xfs_baread(mp->m_ddev_targp,
+                                        xfs_buf_readahead(mp->m_ddev_targp,
                                                XFS_FSB_TO_DADDR(mp,
                                                   map[ra_index].br_startblock +
                                                   ra_offset),
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 87c2e9d02288..8f6fc1a96386 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -293,9 +293,11 @@ typedef struct xfs_bstat {
        __s32           bs_extsize;     /* extent size                  */
        __s32           bs_extents;     /* number of extents            */
        __u32           bs_gen;         /* generation count             */
-        __u16           bs_projid;      /* project id                   */
+        __u16           bs_projid_lo;   /* lower part of project id     */
+#define bs_projid       bs_projid_lo    /* (previously just bs_projid)  */
        __u16           bs_forkoff;     /* inode fork offset in bytes   */
-        unsigned char   bs_pad[12];     /* pad space, unused            */
+        __u16           bs_projid_hi;   /* higher part of project id    */
+        unsigned char   bs_pad[10];     /* pad space, unused            */
        __u32           bs_dmevmask;    /* DMIG event mask              */
        __u16           bs_dmstate;     /* DMIG state info              */
        __u16           bs_aextents;    /* attribute number of extents  */
@@ -448,6 +450,7 @@ typedef struct xfs_handle {
 /*      XFS_IOC_SETBIOSIZE ---- deprecated 46      */
 /*      XFS_IOC_GETBIOSIZE ---- deprecated 47      */
 #define XFS_IOC_GETBMAPX        _IOWR('X', 56, struct getbmap)
+#define XFS_IOC_ZERO_RANGE      _IOW ('X', 57, struct xfs_flock64)
 /*
 * ioctl commands that replace IRIX syssgi()'s
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 43b1d5699335..a7c116e814af 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -144,12 +144,11 @@ xfs_growfs_data_private(
        if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb)))
                return error;
        dpct = pct - mp->m_sb.sb_imax_pct;
-        error = xfs_read_buf(mp, mp->m_ddev_targp,
+        bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp,
-                        XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1),
+                                XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1),
-                        XFS_FSS_TO_BB(mp, 1), 0, &bp);
+                                BBTOB(XFS_FSS_TO_BB(mp, 1)), 0);
-        if (error)
+        if (!bp)
-                return error;
+                return EIO;
-        ASSERT(bp);
        xfs_buf_relse(bp);
        new = nb;       /* use new as a temporary here */
@@ -597,7 +596,8 @@ out:
                 * the extra reserve blocks from the reserve.....
                 */
                int error;
-                error = xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, fdblks_delta, 0);
+                error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
+                                                 fdblks_delta, 0);
                if (error == ENOSPC)
                        goto retry;
        }
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 5371d2dc360e..0626a32c3447 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -212,7 +212,7 @@ xfs_ialloc_inode_init(
                 *      to log a whole cluster of inodes instead of all the
                 *      individual transactions causing a lot of log traffic.
                 */
-                xfs_biozero(fbuf, 0, ninodes << mp->m_sb.sb_inodelog);
+                xfs_buf_zero(fbuf, 0, ninodes << mp->m_sb.sb_inodelog);
                for (i = 0; i < ninodes; i++) {
                        int     ioffset = i << mp->m_sb.sb_inodelog;
                        uint    isize = sizeof(struct xfs_dinode);
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c
index d352862cefa0..16921f55c542 100644
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/xfs_ialloc_btree.c
@@ -183,38 +183,6 @@ xfs_inobt_key_diff(
                          cur->bc_rec.i.ir_startino;
 }
-STATIC int
-xfs_inobt_kill_root(
-        struct xfs_btree_cur    *cur,
-        struct xfs_buf          *bp,
-        int                     level,
-        union xfs_btree_ptr     *newroot)
-{
-        int                     error;
-        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-        XFS_BTREE_STATS_INC(cur, killroot);
-        /*
-         * Update the root pointer, decreasing the level by 1 and then
-         * free the old root.
-         */
-        xfs_inobt_set_root(cur, newroot, -1);
-        error = xfs_inobt_free_block(cur, bp);
-        if (error) {
-                XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
-                return error;
-        }
-        XFS_BTREE_STATS_INC(cur, free);
-        cur->bc_bufs[level] = NULL;
-        cur->bc_nlevels--;
-        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-        return 0;
-}
 #ifdef DEBUG
 STATIC int
 xfs_inobt_keys_inorder(
@@ -309,7 +277,6 @@ static const struct xfs_btree_ops xfs_inobt_ops = {
        .dup_cursor             = xfs_inobt_dup_cursor,
        .set_root               = xfs_inobt_set_root,
-        .kill_root              = xfs_inobt_kill_root,
        .alloc_block            = xfs_inobt_alloc_block,
        .free_block             = xfs_inobt_free_block,
        .get_minrecs            = xfs_inobt_get_minrecs,
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index b1ecc6f97ade..0cdd26932d8e 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -365,8 +365,8 @@ xfs_iget(
        xfs_perag_t     *pag;
        xfs_agino_t     agino;
-        /* the radix tree exists only in inode capable AGs */
+        /* reject inode numbers outside existing AGs */
-        if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_maxagi)
+        if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount)
                return EINVAL;
        /* get the perag structure and ensure that it's inode capable */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 34798f391c49..108c7a085f94 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -660,7 +660,8 @@ xfs_dinode_from_disk(
        to->di_uid = be32_to_cpu(from->di_uid);
        to->di_gid = be32_to_cpu(from->di_gid);
        to->di_nlink = be32_to_cpu(from->di_nlink);
-        to->di_projid = be16_to_cpu(from->di_projid);
+        to->di_projid_lo = be16_to_cpu(from->di_projid_lo);
+        to->di_projid_hi = be16_to_cpu(from->di_projid_hi);
        memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
        to->di_flushiter = be16_to_cpu(from->di_flushiter);
        to->di_atime.t_sec = be32_to_cpu(from->di_atime.t_sec);
@@ -695,7 +696,8 @@ xfs_dinode_to_disk(
        to->di_uid = cpu_to_be32(from->di_uid);
        to->di_gid = cpu_to_be32(from->di_gid);
        to->di_nlink = cpu_to_be32(from->di_nlink);
-        to->di_projid = cpu_to_be16(from->di_projid);
+        to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
+        to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
        memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
        to->di_flushiter = cpu_to_be16(from->di_flushiter);
        to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
@@ -874,7 +876,7 @@ xfs_iread(
        if (ip->i_d.di_version == 1) {
                ip->i_d.di_nlink = ip->i_d.di_onlink;
                ip->i_d.di_onlink = 0;
-                ip->i_d.di_projid = 0;
+                xfs_set_projid(ip, 0);
        }
        ip->i_delayed_blks = 0;
@@ -982,8 +984,7 @@ xfs_ialloc(
        mode_t          mode,
        xfs_nlink_t     nlink,
        xfs_dev_t       rdev,
-        cred_t          *cr,
+        prid_t          prid,
-        xfs_prid_t      prid,
        int             okalloc,
        xfs_buf_t       **ialloc_context,
        boolean_t       *call_again,
@@ -1027,7 +1028,7 @@ xfs_ialloc(
        ASSERT(ip->i_d.di_nlink == nlink);
        ip->i_d.di_uid = current_fsuid();
        ip->i_d.di_gid = current_fsgid();
-        ip->i_d.di_projid = prid;
+        xfs_set_projid(ip, prid);
        memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
        /*
@@ -2725,7 +2726,7 @@ cluster_corrupt_out:
                        XFS_BUF_UNDONE(bp);
                        XFS_BUF_STALE(bp);
                        XFS_BUF_ERROR(bp,EIO);
-                        xfs_biodone(bp);
+                        xfs_buf_ioend(bp, 0);
                } else {
                        XFS_BUF_STALE(bp);
                        xfs_buf_relse(bp);
@@ -3008,7 +3009,7 @@ xfs_iflush_int(
                        memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
                        memset(&(dip->di_pad[0]), 0,
                              sizeof(dip->di_pad));
-                        ASSERT(ip->i_d.di_projid == 0);
+                        ASSERT(xfs_get_projid(ip) == 0);
                }
        }
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 0898c5417d12..fac52290de90 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -134,8 +134,9 @@ typedef struct xfs_icdinode {
        __uint32_t      di_uid;         /* owner's user id */
        __uint32_t      di_gid;         /* owner's group id */
        __uint32_t      di_nlink;       /* number of links to file */
-        __uint16_t      di_projid;      /* owner's project id */
+        __uint16_t      di_projid_lo;   /* lower part of owner's project id */
-        __uint8_t       di_pad[8];      /* unused, zeroed space */
+        __uint16_t      di_projid_hi;   /* higher part of owner's project id */
+        __uint8_t       di_pad[6];      /* unused, zeroed space */
        __uint16_t      di_flushiter;   /* incremented on flush */
        xfs_ictimestamp_t di_atime;     /* time last accessed */
        xfs_ictimestamp_t di_mtime;     /* time last modified */
@@ -212,7 +213,6 @@ typedef struct xfs_icdinode {
 #ifdef __KERNEL__
 struct bhv_desc;
-struct cred;
 struct xfs_buf;
 struct xfs_bmap_free;
 struct xfs_bmbt_irec;
@@ -335,6 +335,25 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
 }
 /*
+ * Project quota id helpers (previously projid was 16bit only
+ * and using two 16bit values to hold new 32bit projid was choosen
+ * to retain compatibility with "old" filesystems).
+ */
+static inline prid_t
+xfs_get_projid(struct xfs_inode *ip)
+{
+        return (prid_t)ip->i_d.di_projid_hi << 16 | ip->i_d.di_projid_lo;
+}
+static inline void
+xfs_set_projid(struct xfs_inode *ip,
+                prid_t projid)
+{
+        ip->i_d.di_projid_hi = (__uint16_t) (projid >> 16);
+        ip->i_d.di_projid_lo = (__uint16_t) (projid & 0xffff);
+}
+/*
 * Manage the i_flush queue embedded in the inode.  This completion
 * queue synchronizes processes attempting to flush the in-core
 * inode back to disk.
@@ -456,8 +475,8 @@ void		xfs_inode_free(struct xfs_inode *ip);
 * xfs_inode.c prototypes.
 */
 int             xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t,
-                           xfs_nlink_t, xfs_dev_t, cred_t *, xfs_prid_t,
+                           xfs_nlink_t, xfs_dev_t, prid_t, int,
-                           int, struct xfs_buf **, boolean_t *, xfs_inode_t **);
+                           struct xfs_buf **, boolean_t *, xfs_inode_t **);
 uint            xfs_ip2xflags(struct xfs_inode *);
 uint            xfs_dic2xflags(struct xfs_dinode *);
@@ -471,7 +490,6 @@ int		xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
 void            xfs_iext_realloc(xfs_inode_t *, int, int);
 void            xfs_iunpin_wait(xfs_inode_t *);
 int             xfs_iflush(xfs_inode_t *, uint);
-void            xfs_ichgtime(xfs_inode_t *, int);
 void            xfs_lock_inodes(xfs_inode_t **, int, uint);
 void            xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index fe00777e2796..c7ac020705df 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -223,15 +223,6 @@ xfs_inode_item_format(
        nvecs        = 1;
        /*
-         * Make sure the linux inode is dirty. We do this before
-         * clearing i_update_core as the VFS will call back into
-         * XFS here and set i_update_core, so we need to dirty the
-         * inode first so that the ordering of i_update_core and
-         * unlogged modifications still works as described below.
-         */
-        xfs_mark_inode_dirty_sync(ip);
-        /*
         * Clear i_update_core if the timestamps (or any other
         * non-transactional modification) need flushing/logging
         * and we're about to log them with the rest of the core.
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 7e3626e5925c..dc1882adaf54 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -92,7 +92,8 @@ xfs_bulkstat_one_int(
         * further change.
         */
        buf->bs_nlink = dic->di_nlink;
-        buf->bs_projid = dic->di_projid;
+        buf->bs_projid_lo = dic->di_projid_lo;
+        buf->bs_projid_hi = dic->di_projid_hi;
        buf->bs_ino = ino;
        buf->bs_mode = dic->di_mode;
        buf->bs_uid = dic->di_uid;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index ba8e36e0b4e7..cee4ab9f8a9e 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1118,7 +1118,8 @@ xlog_alloc_log(xfs_mount_t	*mp,
                iclog->ic_prev = prev_iclog;
                prev_iclog = iclog;
-                bp = xfs_buf_get_noaddr(log->l_iclog_size, mp->m_logdev_targp);
+                bp = xfs_buf_get_uncached(mp->m_logdev_targp,
+                                                log->l_iclog_size, 0);
                if (!bp)
                        goto out_free_iclog;
                if (!XFS_BUF_CPSEMA(bp))
@@ -1296,7 +1297,7 @@ xlog_bdstrat(
        if (iclog->ic_state & XLOG_STATE_IOERROR) {
                XFS_BUF_ERROR(bp, EIO);
                XFS_BUF_STALE(bp);
-                xfs_biodone(bp);
+                xfs_buf_ioend(bp, 0);
                /*
                 * It would seem logical to return EIO here, but we rely on
                 * the log state machine to propagate I/O errors instead of
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 7e206fc1fa36..23d6ceb5e97b 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -146,102 +146,6 @@ xlog_cil_init_post_recovery(
 }
 /*
- * Insert the log item into the CIL and calculate the difference in space
- * consumed by the item. Add the space to the checkpoint ticket and calculate
- * if the change requires additional log metadata. If it does, take that space
- * as well. Remove the amount of space we addded to the checkpoint ticket from
- * the current transaction ticket so that the accounting works out correctly.
- *
- * If this is the first time the item is being placed into the CIL in this
- * context, pin it so it can't be written to disk until the CIL is flushed to
- * the iclog and the iclog written to disk.
- */
-static void
-xlog_cil_insert(
-        struct log              *log,
-        struct xlog_ticket      *ticket,
-        struct xfs_log_item     *item,
-        struct xfs_log_vec      *lv)
-{
-        struct xfs_cil          *cil = log->l_cilp;
-        struct xfs_log_vec      *old = lv->lv_item->li_lv;
-        struct xfs_cil_ctx      *ctx = cil->xc_ctx;
-        int                     len;
-        int                     diff_iovecs;
-        int                     iclog_space;
-        if (old) {
-                /* existing lv on log item, space used is a delta */
-                ASSERT(!list_empty(&item->li_cil));
-                ASSERT(old->lv_buf && old->lv_buf_len && old->lv_niovecs);
-                len = lv->lv_buf_len - old->lv_buf_len;
-                diff_iovecs = lv->lv_niovecs - old->lv_niovecs;
-                kmem_free(old->lv_buf);
-                kmem_free(old);
-        } else {
-                /* new lv, must pin the log item */
-                ASSERT(!lv->lv_item->li_lv);
-                ASSERT(list_empty(&item->li_cil));
-                len = lv->lv_buf_len;
-                diff_iovecs = lv->lv_niovecs;
-                IOP_PIN(lv->lv_item);
-        }
-        len += diff_iovecs * sizeof(xlog_op_header_t);
-        /* attach new log vector to log item */
-        lv->lv_item->li_lv = lv;
-        spin_lock(&cil->xc_cil_lock);
-        list_move_tail(&item->li_cil, &cil->xc_cil);
-        ctx->nvecs += diff_iovecs;
-        /*
-         * If this is the first time the item is being committed to the CIL,
-         * store the sequence number on the log item so we can tell
-         * in future commits whether this is the first checkpoint the item is
-         * being committed into.
-         */
-        if (!item->li_seq)
-                item->li_seq = ctx->sequence;
-        /*
-         * Now transfer enough transaction reservation to the context ticket
-         * for the checkpoint. The context ticket is special - the unit
-         * reservation has to grow as well as the current reservation as we
-         * steal from tickets so we can correctly determine the space used
-         * during the transaction commit.
-         */
-        if (ctx->ticket->t_curr_res == 0) {
-                /* first commit in checkpoint, steal the header reservation */
-                ASSERT(ticket->t_curr_res >= ctx->ticket->t_unit_res + len);
-                ctx->ticket->t_curr_res = ctx->ticket->t_unit_res;
-                ticket->t_curr_res -= ctx->ticket->t_unit_res;
-        }
-        /* do we need space for more log record headers? */
-        iclog_space = log->l_iclog_size - log->l_iclog_hsize;
-        if (len > 0 && (ctx->space_used / iclog_space !=
-                                (ctx->space_used + len) / iclog_space)) {
-                int hdrs;
-                hdrs = (len + iclog_space - 1) / iclog_space;
-                /* need to take into account split region headers, too */
-                hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
-                ctx->ticket->t_unit_res += hdrs;
-                ctx->ticket->t_curr_res += hdrs;
-                ticket->t_curr_res -= hdrs;
-                ASSERT(ticket->t_curr_res >= len);
-        }
-        ticket->t_curr_res -= len;
-        ctx->space_used += len;
-        spin_unlock(&cil->xc_cil_lock);
-}
-/*
 * Format log item into a flat buffers
 *
 * For delayed logging, we need to hold a formatted buffer containing all the
@@ -286,7 +190,7 @@ xlog_cil_format_items(
                        len += lv->lv_iovecp[index].i_len;
                lv->lv_buf_len = len;
-                lv->lv_buf = kmem_zalloc(lv->lv_buf_len, KM_SLEEP|KM_NOFS);
+                lv->lv_buf = kmem_alloc(lv->lv_buf_len, KM_SLEEP|KM_NOFS);
                ptr = lv->lv_buf;
                for (index = 0; index < lv->lv_niovecs; index++) {
@@ -300,21 +204,136 @@ xlog_cil_format_items(
        }
 }
+/*
+ * Prepare the log item for insertion into the CIL. Calculate the difference in
+ * log space and vectors it will consume, and if it is a new item pin it as
+ * well.
+ */
+STATIC void
+xfs_cil_prepare_item(
+        struct log              *log,
+        struct xfs_log_vec      *lv,
+        int                     *len,
+        int                     *diff_iovecs)
+{
+        struct xfs_log_vec      *old = lv->lv_item->li_lv;
+        if (old) {
+                /* existing lv on log item, space used is a delta */
+                ASSERT(!list_empty(&lv->lv_item->li_cil));
+                ASSERT(old->lv_buf && old->lv_buf_len && old->lv_niovecs);
+                *len += lv->lv_buf_len - old->lv_buf_len;
+                *diff_iovecs += lv->lv_niovecs - old->lv_niovecs;
+                kmem_free(old->lv_buf);
+                kmem_free(old);
+        } else {
+                /* new lv, must pin the log item */
+                ASSERT(!lv->lv_item->li_lv);
+                ASSERT(list_empty(&lv->lv_item->li_cil));
+                *len += lv->lv_buf_len;
+                *diff_iovecs += lv->lv_niovecs;
+                IOP_PIN(lv->lv_item);
+        }
+        /* attach new log vector to log item */
+        lv->lv_item->li_lv = lv;
+        /*
+         * If this is the first time the item is being committed to the
+         * CIL, store the sequence number on the log item so we can
+         * tell in future commits whether this is the first checkpoint
+         * the item is being committed into.
+         */
+        if (!lv->lv_item->li_seq)
+                lv->lv_item->li_seq = log->l_cilp->xc_ctx->sequence;
+}
+/*
+ * Insert the log items into the CIL and calculate the difference in space
+ * consumed by the item. Add the space to the checkpoint ticket and calculate
+ * if the change requires additional log metadata. If it does, take that space
+ * as well. Remove the amount of space we addded to the checkpoint ticket from
+ * the current transaction ticket so that the accounting works out correctly.
+ */
 static void
 xlog_cil_insert_items(
        struct log              *log,
        struct xfs_log_vec      *log_vector,
-        struct xlog_ticket      *ticket,
+        struct xlog_ticket      *ticket)
-        xfs_lsn_t               *start_lsn)
 {
-        struct xfs_log_vec *lv;
+        struct xfs_cil          *cil = log->l_cilp;
+        struct xfs_cil_ctx      *ctx = cil->xc_ctx;
-        if (start_lsn)
+        struct xfs_log_vec      *lv;
-                *start_lsn = log->l_cilp->xc_ctx->sequence;
+        int                     len = 0;
+        int                     diff_iovecs = 0;
+        int                     iclog_space;
        ASSERT(log_vector);
+        /*
+         * Do all the accounting aggregation and switching of log vectors
+         * around in a separate loop to the insertion of items into the CIL.
+         * Then we can do a separate loop to update the CIL within a single
+         * lock/unlock pair. This reduces the number of round trips on the CIL
+         * lock from O(nr_logvectors) to O(1) and greatly reduces the overall
+         * hold time for the transaction commit.
+         *
+         * If this is the first time the item is being placed into the CIL in
+         * this context, pin it so it can't be written to disk until the CIL is
+         * flushed to the iclog and the iclog written to disk.
+         *
+         * We can do this safely because the context can't checkpoint until we
+         * are done so it doesn't matter exactly how we update the CIL.
+         */
+        for (lv = log_vector; lv; lv = lv->lv_next)
+                xfs_cil_prepare_item(log, lv, &len, &diff_iovecs);
+        /* account for space used by new iovec headers  */
+        len += diff_iovecs * sizeof(xlog_op_header_t);
+        spin_lock(&cil->xc_cil_lock);
+        /* move the items to the tail of the CIL */
        for (lv = log_vector; lv; lv = lv->lv_next)
-                xlog_cil_insert(log, ticket, lv->lv_item, lv);
+                list_move_tail(&lv->lv_item->li_cil, &cil->xc_cil);
+        ctx->nvecs += diff_iovecs;
+        /*
+         * Now transfer enough transaction reservation to the context ticket
+         * for the checkpoint. The context ticket is special - the unit
+         * reservation has to grow as well as the current reservation as we
+         * steal from tickets so we can correctly determine the space used
+         * during the transaction commit.
+         */
+        if (ctx->ticket->t_curr_res == 0) {
+                /* first commit in checkpoint, steal the header reservation */
+                ASSERT(ticket->t_curr_res >= ctx->ticket->t_unit_res + len);
+                ctx->ticket->t_curr_res = ctx->ticket->t_unit_res;
+                ticket->t_curr_res -= ctx->ticket->t_unit_res;
+        }
+        /* do we need space for more log record headers? */
+        iclog_space = log->l_iclog_size - log->l_iclog_hsize;
+        if (len > 0 && (ctx->space_used / iclog_space !=
+                                (ctx->space_used + len) / iclog_space)) {
+                int hdrs;
+                hdrs = (len + iclog_space - 1) / iclog_space;
+                /* need to take into account split region headers, too */
+                hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
+                ctx->ticket->t_unit_res += hdrs;
+                ctx->ticket->t_curr_res += hdrs;
+                ticket->t_curr_res -= hdrs;
+                ASSERT(ticket->t_curr_res >= len);
+        }
+        ticket->t_curr_res -= len;
+        ctx->space_used += len;
+        spin_unlock(&cil->xc_cil_lock);
 }
 static void
@@ -638,7 +657,10 @@ xfs_log_commit_cil(
        /* lock out background commit */
        down_read(&log->l_cilp->xc_ctx_lock);
-        xlog_cil_insert_items(log, log_vector, tp->t_ticket, commit_lsn);
+        if (commit_lsn)
+                *commit_lsn = log->l_cilp->xc_ctx->sequence;
+        xlog_cil_insert_items(log, log_vector, tp->t_ticket);
        /* check we didn't blow the reservation */
        if (tp->t_ticket->t_curr_res < 0)
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 6f3f5fa37acf..966d3f97458c 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -107,7 +107,8 @@ xlog_get_bp(
                nbblks += log->l_sectBBsize;
        nbblks = round_up(nbblks, log->l_sectBBsize);
-        return xfs_buf_get_noaddr(BBTOB(nbblks), log->l_mp->m_logdev_targp);
+        return xfs_buf_get_uncached(log->l_mp->m_logdev_targp,
+                                        BBTOB(nbblks), 0);
 }
 STATIC void
@@ -167,7 +168,7 @@ xlog_bread_noalign(
        XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp);
        xfsbdstrat(log->l_mp, bp);
-        error = xfs_iowait(bp);
+        error = xfs_buf_iowait(bp);
        if (error)
                xfs_ioerror_alert("xlog_bread", log->l_mp,
                                  bp, XFS_BUF_ADDR(bp));
@@ -321,12 +322,13 @@ xlog_recover_iodone(
                 * this during recovery. One strike!
                 */
                xfs_ioerror_alert("xlog_recover_iodone",
-                                  bp->b_mount, bp, XFS_BUF_ADDR(bp));
+                                        bp->b_target->bt_mount, bp,
-                xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR);
+                                        XFS_BUF_ADDR(bp));
+                xfs_force_shutdown(bp->b_target->bt_mount,
+                                        SHUTDOWN_META_IO_ERROR);
        }
-        bp->b_mount = NULL;
        XFS_BUF_CLR_IODONE_FUNC(bp);
-        xfs_biodone(bp);
+        xfs_buf_ioend(bp, 0);
 }
 /*
@@ -2275,8 +2277,7 @@ xlog_recover_do_buffer_trans(
                XFS_BUF_STALE(bp);
                error = xfs_bwrite(mp, bp);
        } else {
-                ASSERT(bp->b_mount == NULL || bp->b_mount == mp);
+                ASSERT(bp->b_target->bt_mount == mp);
-                bp->b_mount = mp;
                XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
                xfs_bdwrite(mp, bp);
        }
@@ -2540,8 +2541,7 @@ xlog_recover_do_inode_trans(
        }
 write_inode_buffer:
-        ASSERT(bp->b_mount == NULL || bp->b_mount == mp);
+        ASSERT(bp->b_target->bt_mount == mp);
-        bp->b_mount = mp;
        XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
        xfs_bdwrite(mp, bp);
 error:
@@ -2678,8 +2678,7 @@ xlog_recover_do_dquot_trans(
        memcpy(ddq, recddq, item->ri_buf[1].i_len);
        ASSERT(dq_f->qlf_size == 2);
-        ASSERT(bp->b_mount == NULL || bp->b_mount == mp);
+        ASSERT(bp->b_target->bt_mount == mp);
-        bp->b_mount = mp;
        XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
        xfs_bdwrite(mp, bp);
@@ -3817,7 +3816,7 @@ xlog_do_recover(
        XFS_BUF_READ(bp);
        XFS_BUF_UNASYNC(bp);
        xfsbdstrat(log->l_mp, bp);
-        error = xfs_iowait(bp);
+        error = xfs_buf_iowait(bp);
        if (error) {
                xfs_ioerror_alert("xlog_do_recover",
                                  log->l_mp, bp, XFS_BUF_ADDR(bp));
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index aeb9d72ebf6e..b1498ab5a399 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -52,16 +52,11 @@ STATIC void	xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t,
                                                int);
 STATIC void     xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t,
                                                int);
-STATIC int      xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t,
-                                                int64_t, int);
 STATIC void     xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
 #else
 #define xfs_icsb_balance_counter(mp, a, b)              do { } while (0)
 #define xfs_icsb_balance_counter_locked(mp, a, b)       do { } while (0)
-#define xfs_icsb_modify_counters(mp, a, b, c)           do { } while (0)
 #endif
 static const struct {
@@ -199,6 +194,8 @@ xfs_uuid_unmount(
 /*
 * Reference counting access wrappers to the perag structures.
+ * Because we never free per-ag structures, the only thing we
+ * have to protect against changes is the tree structure itself.
 */
 struct xfs_perag *
 xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
@@ -206,19 +203,43 @@ xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
        struct xfs_perag        *pag;
        int                     ref = 0;
-        spin_lock(&mp->m_perag_lock);
+        rcu_read_lock();
        pag = radix_tree_lookup(&mp->m_perag_tree, agno);
        if (pag) {
                ASSERT(atomic_read(&pag->pag_ref) >= 0);
-                /* catch leaks in the positive direction during testing */
-                ASSERT(atomic_read(&pag->pag_ref) < 1000);
                ref = atomic_inc_return(&pag->pag_ref);
        }
-        spin_unlock(&mp->m_perag_lock);
+        rcu_read_unlock();
        trace_xfs_perag_get(mp, agno, ref, _RET_IP_);
        return pag;
 }
+/*
+ * search from @first to find the next perag with the given tag set.
+ */
+struct xfs_perag *
+xfs_perag_get_tag(
+        struct xfs_mount        *mp,
+        xfs_agnumber_t          first,
+        int                     tag)
+{
+        struct xfs_perag        *pag;
+        int                     found;
+        int                     ref;
+        rcu_read_lock();
+        found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
+                                        (void **)&pag, first, 1, tag);
+        if (found <= 0) {
+                rcu_read_unlock();
+                return NULL;
+        }
+        ref = atomic_inc_return(&pag->pag_ref);
+        rcu_read_unlock();
+        trace_xfs_perag_get_tag(mp, pag->pag_agno, ref, _RET_IP_);
+        return pag;
+}
 void
 xfs_perag_put(struct xfs_perag *pag)
 {
@@ -229,10 +250,18 @@ xfs_perag_put(struct xfs_perag *pag)
        trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_);
 }
+STATIC void
+__xfs_free_perag(
+        struct rcu_head *head)
+{
+        struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head);
+        ASSERT(atomic_read(&pag->pag_ref) == 0);
+        kmem_free(pag);
+}
 /*
- * Free up the resources associated with a mount structure.  Assume that
+ * Free up the per-ag resources associated with the mount structure.
- * the structure was initially zeroed, so we can tell which fields got
- * initialized.
 */
 STATIC void
 xfs_free_perag(
@@ -244,10 +273,9 @@ xfs_free_perag(
        for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
                spin_lock(&mp->m_perag_lock);
                pag = radix_tree_delete(&mp->m_perag_tree, agno);
-                ASSERT(pag);
-                ASSERT(atomic_read(&pag->pag_ref) == 0);
                spin_unlock(&mp->m_perag_lock);
-                kmem_free(pag);
+                ASSERT(pag);
+                call_rcu(&pag->rcu_head, __xfs_free_perag);
        }
 }
@@ -444,7 +472,10 @@ xfs_initialize_perag(
                pag->pag_agno = index;
                pag->pag_mount = mp;
                rwlock_init(&pag->pag_ici_lock);
+                mutex_init(&pag->pag_ici_reclaim_lock);
                INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
+                spin_lock_init(&pag->pag_buf_lock);
+                pag->pag_buf_tree = RB_ROOT;
                if (radix_tree_preload(GFP_NOFS))
                        goto out_unwind;
@@ -639,7 +670,6 @@ int
 xfs_readsb(xfs_mount_t *mp, int flags)
 {
        unsigned int    sector_size;
-        unsigned int    extra_flags;
        xfs_buf_t       *bp;
        int             error;
@@ -652,28 +682,24 @@ xfs_readsb(xfs_mount_t *mp, int flags)
         * access to the superblock.
         */
        sector_size = xfs_getsize_buftarg(mp->m_ddev_targp);
-        extra_flags = XBF_LOCK | XBF_FS_MANAGED | XBF_MAPPED;
-        bp = xfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR, BTOBB(sector_size),
+reread:
-                          extra_flags);
+        bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp,
-        if (!bp || XFS_BUF_ISERROR(bp)) {
+                                        XFS_SB_DADDR, sector_size, 0);
-                xfs_fs_mount_cmn_err(flags, "SB read failed");
+        if (!bp) {
-                error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM;
+                xfs_fs_mount_cmn_err(flags, "SB buffer read failed");
-                goto fail;
+                return EIO;
        }
-        ASSERT(XFS_BUF_ISBUSY(bp));
-        ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
        /*
         * Initialize the mount structure from the superblock.
         * But first do some basic consistency checking.
         */
        xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp));
        error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags);
        if (error) {
                xfs_fs_mount_cmn_err(flags, "SB validate failed");
-                goto fail;
+                goto release_buf;
        }
        /*
@@ -684,7 +710,7 @@ xfs_readsb(xfs_mount_t *mp, int flags)
                        "device supports only %u byte sectors (not %u)",
                        sector_size, mp->m_sb.sb_sectsize);
                error = ENOSYS;
-                goto fail;
+                goto release_buf;
        }
        /*
@@ -692,33 +718,20 @@ xfs_readsb(xfs_mount_t *mp, int flags)
         * re-read the superblock so the buffer is correctly sized.
         */
        if (sector_size < mp->m_sb.sb_sectsize) {
-                XFS_BUF_UNMANAGE(bp);
                xfs_buf_relse(bp);
                sector_size = mp->m_sb.sb_sectsize;
-                bp = xfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR,
+                goto reread;
-                                  BTOBB(sector_size), extra_flags);
-                if (!bp || XFS_BUF_ISERROR(bp)) {
-                        xfs_fs_mount_cmn_err(flags, "SB re-read failed");
-                        error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM;
-                        goto fail;
-                }
-                ASSERT(XFS_BUF_ISBUSY(bp));
-                ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
        }
        /* Initialize per-cpu counters */
        xfs_icsb_reinit_counters(mp);
        mp->m_sb_bp = bp;
-        xfs_buf_relse(bp);
+        xfs_buf_unlock(bp);
-        ASSERT(XFS_BUF_VALUSEMA(bp) > 0);
        return 0;
- fail:
+release_buf:
-        if (bp) {
+        xfs_buf_relse(bp);
-                XFS_BUF_UNMANAGE(bp);
-                xfs_buf_relse(bp);
-        }
        return error;
 }
@@ -991,42 +1004,35 @@ xfs_check_sizes(xfs_mount_t *mp)
 {
        xfs_buf_t       *bp;
        xfs_daddr_t     d;
-        int             error;
        d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
        if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
-                cmn_err(CE_WARN, "XFS: size check 1 failed");
+                cmn_err(CE_WARN, "XFS: filesystem size mismatch detected");
                return XFS_ERROR(EFBIG);
        }
-        error = xfs_read_buf(mp, mp->m_ddev_targp,
+        bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp,
-                             d - XFS_FSS_TO_BB(mp, 1),
+                                        d - XFS_FSS_TO_BB(mp, 1),
-                             XFS_FSS_TO_BB(mp, 1), 0, &bp);
+                                        BBTOB(XFS_FSS_TO_BB(mp, 1)), 0);
-        if (!error) {
+        if (!bp) {
-                xfs_buf_relse(bp);
+                cmn_err(CE_WARN, "XFS: last sector read failed");
-        } else {
+                return EIO;
-                cmn_err(CE_WARN, "XFS: size check 2 failed");
-                if (error == ENOSPC)
-                        error = XFS_ERROR(EFBIG);
-                return error;
        }
+        xfs_buf_relse(bp);
        if (mp->m_logdev_targp != mp->m_ddev_targp) {
                d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
                if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
-                        cmn_err(CE_WARN, "XFS: size check 3 failed");
+                        cmn_err(CE_WARN, "XFS: log size mismatch detected");
                        return XFS_ERROR(EFBIG);
                }
-                error = xfs_read_buf(mp, mp->m_logdev_targp,
+                bp = xfs_buf_read_uncached(mp, mp->m_logdev_targp,
-                                     d - XFS_FSB_TO_BB(mp, 1),
+                                        d - XFS_FSB_TO_BB(mp, 1),
-                                     XFS_FSB_TO_BB(mp, 1), 0, &bp);
+                                        XFS_FSB_TO_B(mp, 1), 0);
-                if (!error) {
+                if (!bp) {
-                        xfs_buf_relse(bp);
+                        cmn_err(CE_WARN, "XFS: log device read failed");
-                } else {
+                        return EIO;
-                        cmn_err(CE_WARN, "XFS: size check 3 failed");
-                        if (error == ENOSPC)
-                                error = XFS_ERROR(EFBIG);
-                        return error;
                }
+                xfs_buf_relse(bp);
        }
        return 0;
 }
@@ -1601,7 +1607,7 @@ xfs_unmountfs_writesb(xfs_mount_t *mp)
                XFS_BUF_UNASYNC(sbp);
                ASSERT(XFS_BUF_TARGET(sbp) == mp->m_ddev_targp);
                xfsbdstrat(mp, sbp);
-                error = xfs_iowait(sbp);
+                error = xfs_buf_iowait(sbp);
                if (error)
                        xfs_ioerror_alert("xfs_unmountfs_writesb",
                                          mp, sbp, XFS_BUF_ADDR(sbp));
@@ -1832,135 +1838,72 @@ xfs_mod_incore_sb_unlocked(
 */
 int
 xfs_mod_incore_sb(
-        xfs_mount_t     *mp,
+        struct xfs_mount        *mp,
-        xfs_sb_field_t  field,
+        xfs_sb_field_t          field,
-        int64_t         delta,
+        int64_t                 delta,
-        int             rsvd)
+        int                     rsvd)
 {
-        int     status;
+        int                     status;
-        /* check for per-cpu counters */
-        switch (field) {
 #ifdef HAVE_PERCPU_SB
-        case XFS_SBS_ICOUNT:
+        ASSERT(field < XFS_SBS_ICOUNT || field > XFS_SBS_FDBLOCKS);
-        case XFS_SBS_IFREE:
-        case XFS_SBS_FDBLOCKS:
-                if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
-                        status = xfs_icsb_modify_counters(mp, field,
-                                                        delta, rsvd);
-                        break;
-                }
-                /* FALLTHROUGH */
 #endif
-        default:
+        spin_lock(&mp->m_sb_lock);
-                spin_lock(&mp->m_sb_lock);
+        status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
-                status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
+        spin_unlock(&mp->m_sb_lock);
-                spin_unlock(&mp->m_sb_lock);
-                break;
-        }
        return status;
 }
 /*
- * xfs_mod_incore_sb_batch() is used to change more than one field
+ * Change more than one field in the in-core superblock structure at a time.
- * in the in-core superblock structure at a time.  This modification
- * is protected by a lock internal to this module.  The fields and
- * changes to those fields are specified in the array of xfs_mod_sb
- * structures passed in.
 *
- * Either all of the specified deltas will be applied or none of
+ * The fields and changes to those fields are specified in the array of
- * them will.  If any modified field dips below 0, then all modifications
+ * xfs_mod_sb structures passed in.  Either all of the specified deltas
- * will be backed out and EINVAL will be returned.
+ * will be applied or none of them will.  If any modified field dips below 0,
+ * then all modifications will be backed out and EINVAL will be returned.
+ *
+ * Note that this function may not be used for the superblock values that
+ * are tracked with the in-memory per-cpu counters - a direct call to
+ * xfs_icsb_modify_counters is required for these.
 */
 int
-xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd)
+xfs_mod_incore_sb_batch(
+        struct xfs_mount        *mp,
+        xfs_mod_sb_t            *msb,
+        uint                    nmsb,
+        int                     rsvd)
 {
-        int             status=0;
+        xfs_mod_sb_t            *msbp = &msb[0];
-        xfs_mod_sb_t    *msbp;
+        int                     error = 0;
        /*
-         * Loop through the array of mod structures and apply each
+         * Loop through the array of mod structures and apply each individually.
-         * individually.  If any fail, then back out all those
+         * If any fail, then back out all those which have already been applied.
-         * which have already been applied.  Do all of this within
+         * Do all of this within the scope of the m_sb_lock so that all of the
-         * the scope of the m_sb_lock so that all of the changes will
+         * changes will be atomic.
-         * be atomic.
         */
        spin_lock(&mp->m_sb_lock);
-        msbp = &msb[0];
        for (msbp = &msbp[0]; msbp < (msb + nmsb); msbp++) {
-                /*
+                ASSERT(msbp->msb_field < XFS_SBS_ICOUNT ||
-                 * Apply the delta at index n.  If it fails, break
+                       msbp->msb_field > XFS_SBS_FDBLOCKS);
-                 * from the loop so we'll fall into the undo loop
-                 * below.
-                 */
-                switch (msbp->msb_field) {
-#ifdef HAVE_PERCPU_SB
-                case XFS_SBS_ICOUNT:
-                case XFS_SBS_IFREE:
-                case XFS_SBS_FDBLOCKS:
-                        if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
-                                spin_unlock(&mp->m_sb_lock);
-                                status = xfs_icsb_modify_counters(mp,
-                                                        msbp->msb_field,
-                                                        msbp->msb_delta, rsvd);
-                                spin_lock(&mp->m_sb_lock);
-                                break;
-                        }
-                        /* FALLTHROUGH */
-#endif
-                default:
-                        status = xfs_mod_incore_sb_unlocked(mp,
-                                                msbp->msb_field,
-                                                msbp->msb_delta, rsvd);
-                        break;
-                }
-                if (status != 0) {
+                error = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field,
-                        break;
+                                                   msbp->msb_delta, rsvd);
-                }
+                if (error)
+                        goto unwind;
        }
+        spin_unlock(&mp->m_sb_lock);
+        return 0;
-        /*
+unwind:
-         * If we didn't complete the loop above, then back out
+        while (--msbp >= msb) {
-         * any changes made to the superblock.  If you add code
+                error = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field,
-         * between the loop above and here, make sure that you
+                                                   -msbp->msb_delta, rsvd);
-         * preserve the value of status. Loop back until
+                ASSERT(error == 0);
-         * we step below the beginning of the array.  Make sure
-         * we don't touch anything back there.
-         */
-        if (status != 0) {
-                msbp--;
-                while (msbp >= msb) {
-                        switch (msbp->msb_field) {
-#ifdef HAVE_PERCPU_SB
-                        case XFS_SBS_ICOUNT:
-                        case XFS_SBS_IFREE:
-                        case XFS_SBS_FDBLOCKS:
-                                if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
-                                        spin_unlock(&mp->m_sb_lock);
-                                        status = xfs_icsb_modify_counters(mp,
-                                                        msbp->msb_field,
-                                                        -(msbp->msb_delta),
-                                                        rsvd);
-                                        spin_lock(&mp->m_sb_lock);
-                                        break;
-                                }
-                                /* FALLTHROUGH */
-#endif
-                        default:
-                                status = xfs_mod_incore_sb_unlocked(mp,
-                                                        msbp->msb_field,
-                                                        -(msbp->msb_delta),
-                                                        rsvd);
-                                break;
-                        }
-                        ASSERT(status == 0);
-                        msbp--;
-                }
        }
        spin_unlock(&mp->m_sb_lock);
-        return status;
+        return error;
 }
 /*
@@ -1998,18 +1941,13 @@ xfs_getsb(
 */
 void
 xfs_freesb(
-        xfs_mount_t     *mp)
+        struct xfs_mount        *mp)
 {
-        xfs_buf_t       *bp;
+        struct xfs_buf          *bp = mp->m_sb_bp;
-        /*
+        xfs_buf_lock(bp);
-         * Use xfs_getsb() so that the buffer will be locked
-         * when we call xfs_buf_relse().
-         */
-        bp = xfs_getsb(mp, 0);
-        XFS_BUF_UNMANAGE(bp);
-        xfs_buf_relse(bp);
        mp->m_sb_bp = NULL;
+        xfs_buf_relse(bp);
 }
 /*
@@ -2496,7 +2434,7 @@ xfs_icsb_balance_counter(
        spin_unlock(&mp->m_sb_lock);
 }
-STATIC int
+int
 xfs_icsb_modify_counters(
        xfs_mount_t     *mp,
        xfs_sb_field_t  field,
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 622da2179a57..5861b4980740 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -53,7 +53,6 @@ typedef struct xfs_trans_reservations {
 #include "xfs_sync.h"
-struct cred;
 struct log;
 struct xfs_mount_args;
 struct xfs_inode;
@@ -91,6 +90,8 @@ extern void	xfs_icsb_reinit_counters(struct xfs_mount *);
 extern void     xfs_icsb_destroy_counters(struct xfs_mount *);
 extern void     xfs_icsb_sync_counters(struct xfs_mount *, int);
 extern void     xfs_icsb_sync_counters_locked(struct xfs_mount *, int);
+extern int      xfs_icsb_modify_counters(struct xfs_mount *, xfs_sb_field_t,
+                                                int64_t, int);
 #else
 #define xfs_icsb_init_counters(mp)              (0)
@@ -98,6 +99,8 @@ extern void	xfs_icsb_sync_counters_locked(struct xfs_mount *, int);
 #define xfs_icsb_reinit_counters(mp)            do { } while (0)
 #define xfs_icsb_sync_counters(mp, flags)       do { } while (0)
 #define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0)
+#define xfs_icsb_modify_counters(mp, field, delta, rsvd) \
+        xfs_mod_incore_sb(mp, field, delta, rsvd)
 #endif
 typedef struct xfs_mount {
@@ -232,8 +235,6 @@ typedef struct xfs_mount {
 #define XFS_MOUNT_DIRSYNC       (1ULL << 21)    /* synchronous directory ops */
 #define XFS_MOUNT_COMPAT_IOSIZE (1ULL << 22)    /* don't report large preferred
                                                 * I/O size in stat() */
-#define XFS_MOUNT_NO_PERCPU_SB  (1ULL << 23)    /* don't use per-cpu superblock
-                                                   counters */
 #define XFS_MOUNT_FILESTREAMS   (1ULL << 24)    /* enable the filestreams
                                                   allocator */
 #define XFS_MOUNT_NOATTR2       (1ULL << 25)    /* disable use of attr2 format */
@@ -327,6 +328,8 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
 * perag get/put wrappers for ref counting
 */
 struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno);
+struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *mp, xfs_agnumber_t agno,
+                                        int tag);
 void    xfs_perag_put(struct xfs_perag *pag);
 /*
diff --git a/fs/xfs/xfs_refcache.h b/fs/xfs/xfs_refcache.h
deleted file mode 100644
index 2dec79edb510..000000000000
--- a/fs/xfs/xfs_refcache.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_REFCACHE_H__
-#define __XFS_REFCACHE_H__
-#ifdef HAVE_REFCACHE
-/*
- * Maximum size (in inodes) for the NFS reference cache
- */
-#define XFS_REFCACHE_SIZE_MAX   512
-struct xfs_inode;
-struct xfs_mount;
-extern void xfs_refcache_insert(struct xfs_inode *);
-extern void xfs_refcache_purge_ip(struct xfs_inode *);
-extern void xfs_refcache_purge_mp(struct xfs_mount *);
-extern void xfs_refcache_purge_some(struct xfs_mount *);
-extern void xfs_refcache_resize(int);
-extern void xfs_refcache_destroy(void);
-extern void xfs_refcache_iunlock(struct xfs_inode *, uint);
-#else
-#define xfs_refcache_insert(ip)         do { } while (0)
-#define xfs_refcache_purge_ip(ip)       do { } while (0)
-#define xfs_refcache_purge_mp(mp)       do { } while (0)
-#define xfs_refcache_purge_some(mp)     do { } while (0)
-#define xfs_refcache_resize(size)       do { } while (0)
-#define xfs_refcache_destroy()          do { } while (0)
-#define xfs_refcache_iunlock(ip, flags) xfs_iunlock(ip, flags)
-#endif
-#endif  /* __XFS_REFCACHE_H__ */
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index 8fca957200df..d2af0a8381a6 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -183,7 +183,7 @@ xfs_rename(
         * tree quota mechanism would be circumvented.
         */
        if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
-                     (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) {
+                     (xfs_get_projid(target_dp) != xfs_get_projid(src_ip)))) {
                error = XFS_ERROR(EXDEV);
                goto error_return;
        }
@@ -211,7 +211,9 @@ xfs_rename(
                        goto error_return;
                if (error)
                        goto abort_return;
-                xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+                xfs_trans_ichgtime(tp, target_dp,
+                                        XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
                if (new_parent && src_is_directory) {
                        error = xfs_bumplink(tp, target_dp);
@@ -249,7 +251,9 @@ xfs_rename(
                                        &first_block, &free_list, spaceres);
                if (error)
                        goto abort_return;
-                xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+                xfs_trans_ichgtime(tp, target_dp,
+                                        XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
                /*
                 * Decrement the link count on the target since the target
@@ -292,7 +296,7 @@ xfs_rename(
         * inode isn't really being changed, but old unix file systems did
         * it and some incremental backup programs won't work without it.
         */
-        xfs_ichgtime(src_ip, XFS_ICHGTIME_CHG);
+        xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG);
        /*
         * Adjust the link count on src_dp.  This is necessary when
@@ -315,7 +319,7 @@ xfs_rename(
        if (error)
                goto abort_return;
-        xfs_ichgtime(src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+        xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
        xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE);
        if (new_parent)
                xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 891260fea11e..12a191385310 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -39,6 +39,7 @@
 #include "xfs_trans_space.h"
 #include "xfs_utils.h"
 #include "xfs_trace.h"
+#include "xfs_buf.h"
 /*
@@ -1883,13 +1884,13 @@ xfs_growfs_rt(
        /*
         * Read in the last block of the device, make sure it exists.
         */
-        error = xfs_read_buf(mp, mp->m_rtdev_targp,
+        bp = xfs_buf_read_uncached(mp, mp->m_rtdev_targp,
-                        XFS_FSB_TO_BB(mp, nrblocks - 1),
+                                XFS_FSB_TO_BB(mp, nrblocks - 1),
-                        XFS_FSB_TO_BB(mp, 1), 0, &bp);
+                                XFS_FSB_TO_B(mp, 1), 0);
-        if (error)
+        if (!bp)
-                return error;
+                return EIO;
-        ASSERT(bp);
        xfs_buf_relse(bp);
        /*
         * Calculate new parameters.  These are the final values to be reached.
         */
@@ -2215,7 +2216,6 @@ xfs_rtmount_init(
 {
        xfs_buf_t       *bp;    /* buffer for last block of subvolume */
        xfs_daddr_t     d;      /* address of last block of subvolume */
-        int             error;  /* error return value */
        xfs_sb_t        *sbp;   /* filesystem superblock copy in mount */
        sbp = &mp->m_sb;
@@ -2242,15 +2242,12 @@ xfs_rtmount_init(
                        (unsigned long long) mp->m_sb.sb_rblocks);
                return XFS_ERROR(EFBIG);
        }
-        error = xfs_read_buf(mp, mp->m_rtdev_targp,
+        bp = xfs_buf_read_uncached(mp, mp->m_rtdev_targp,
-                                d - XFS_FSB_TO_BB(mp, 1),
+                                        d - XFS_FSB_TO_BB(mp, 1),
-                                XFS_FSB_TO_BB(mp, 1), 0, &bp);
+                                        XFS_FSB_TO_B(mp, 1), 0);
-        if (error) {
+        if (!bp) {
-                cmn_err(CE_WARN,
+                cmn_err(CE_WARN, "XFS: realtime device size check failed");
-        "XFS: realtime mount -- xfs_read_buf failed, returned %d", error);
+                return EIO;
-                if (error == ENOSPC)
-                        return XFS_ERROR(EFBIG);
-                return error;
        }
        xfs_buf_relse(bp);
        return 0;
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index 1b017c657494..1eb2ba586814 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -80,10 +80,12 @@ struct xfs_mount;
 #define XFS_SB_VERSION2_RESERVED4BIT    0x00000004
 #define XFS_SB_VERSION2_ATTR2BIT        0x00000008      /* Inline attr rework */
 #define XFS_SB_VERSION2_PARENTBIT       0x00000010      /* parent pointers */
+#define XFS_SB_VERSION2_PROJID32BIT     0x00000080      /* 32 bit project id */
 #define XFS_SB_VERSION2_OKREALFBITS     \
        (XFS_SB_VERSION2_LAZYSBCOUNTBIT | \
-         XFS_SB_VERSION2_ATTR2BIT)
+         XFS_SB_VERSION2_ATTR2BIT       | \
+         XFS_SB_VERSION2_PROJID32BIT)
 #define XFS_SB_VERSION2_OKSASHFBITS     \
        (0)
 #define XFS_SB_VERSION2_OKREALBITS      \
@@ -495,6 +497,12 @@ static inline void xfs_sb_version_removeattr2(xfs_sb_t *sbp)
                sbp->sb_versionnum &= ~XFS_SB_VERSION_MOREBITSBIT;
 }
+static inline int xfs_sb_version_hasprojid32bit(xfs_sb_t *sbp)
+{
+        return xfs_sb_version_hasmorebits(sbp) &&
+                (sbp->sb_features2 & XFS_SB_VERSION2_PROJID32BIT);
+}
 /*
 * end of superblock version macros
 */
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 1c47edaea0d2..f6d956b7711e 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -696,7 +696,7 @@ xfs_trans_reserve(
         * fail if the count would go below zero.
         */
        if (blocks > 0) {
-                error = xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FDBLOCKS,
+                error = xfs_icsb_modify_counters(tp->t_mountp, XFS_SBS_FDBLOCKS,
                                          -((int64_t)blocks), rsvd);
                if (error != 0) {
                        current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
@@ -767,7 +767,7 @@ undo_log:
 undo_blocks:
        if (blocks > 0) {
-                (void) xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FDBLOCKS,
+                xfs_icsb_modify_counters(tp->t_mountp, XFS_SBS_FDBLOCKS,
                                         (int64_t)blocks, rsvd);
                tp->t_blk_res = 0;
        }
@@ -1009,7 +1009,7 @@ void
 xfs_trans_unreserve_and_mod_sb(
        xfs_trans_t     *tp)
 {
-        xfs_mod_sb_t    msb[14];        /* If you add cases, add entries */
+        xfs_mod_sb_t    msb[9]; /* If you add cases, add entries */
        xfs_mod_sb_t    *msbp;
        xfs_mount_t     *mp = tp->t_mountp;
        /* REFERENCED */
@@ -1017,55 +1017,61 @@ xfs_trans_unreserve_and_mod_sb(
        int             rsvd;
        int64_t         blkdelta = 0;
        int64_t         rtxdelta = 0;
+        int64_t         idelta = 0;
+        int64_t         ifreedelta = 0;
        msbp = msb;
        rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
-        /* calculate free blocks delta */
+        /* calculate deltas */
        if (tp->t_blk_res > 0)
                blkdelta = tp->t_blk_res;
        if ((tp->t_fdblocks_delta != 0) &&
            (xfs_sb_version_haslazysbcount(&mp->m_sb) ||
             (tp->t_flags & XFS_TRANS_SB_DIRTY)))
                blkdelta += tp->t_fdblocks_delta;
-        if (blkdelta != 0) {
-                msbp->msb_field = XFS_SBS_FDBLOCKS;
-                msbp->msb_delta = blkdelta;
-                msbp++;
-        }
-        /* calculate free realtime extents delta */
        if (tp->t_rtx_res > 0)
                rtxdelta = tp->t_rtx_res;
        if ((tp->t_frextents_delta != 0) &&
            (tp->t_flags & XFS_TRANS_SB_DIRTY))
                rtxdelta += tp->t_frextents_delta;
+        if (xfs_sb_version_haslazysbcount(&mp->m_sb) ||
+             (tp->t_flags & XFS_TRANS_SB_DIRTY)) {
+                idelta = tp->t_icount_delta;
+                ifreedelta = tp->t_ifree_delta;
+        }
+        /* apply the per-cpu counters */
+        if (blkdelta) {
+                error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
+                                                 blkdelta, rsvd);
+                if (error)
+                        goto out;
+        }
+        if (idelta) {
+                error = xfs_icsb_modify_counters(mp, XFS_SBS_ICOUNT,
+                                                 idelta, rsvd);
+                if (error)
+                        goto out_undo_fdblocks;
+        }
+        if (ifreedelta) {
+                error = xfs_icsb_modify_counters(mp, XFS_SBS_IFREE,
+                                                 ifreedelta, rsvd);
+                if (error)
+                        goto out_undo_icount;
+        }
+        /* apply remaining deltas */
        if (rtxdelta != 0) {
                msbp->msb_field = XFS_SBS_FREXTENTS;
                msbp->msb_delta = rtxdelta;
                msbp++;
        }
-        /* apply remaining deltas */
-        if (xfs_sb_version_haslazysbcount(&mp->m_sb) ||
-             (tp->t_flags & XFS_TRANS_SB_DIRTY)) {
-                if (tp->t_icount_delta != 0) {
-                        msbp->msb_field = XFS_SBS_ICOUNT;
-                        msbp->msb_delta = tp->t_icount_delta;
-                        msbp++;
-                }
-                if (tp->t_ifree_delta != 0) {
-                        msbp->msb_field = XFS_SBS_IFREE;
-                        msbp->msb_delta = tp->t_ifree_delta;
-                        msbp++;
-                }
-        }
        if (tp->t_flags & XFS_TRANS_SB_DIRTY) {
                if (tp->t_dblocks_delta != 0) {
                        msbp->msb_field = XFS_SBS_DBLOCKS;
@@ -1115,8 +1121,24 @@ xfs_trans_unreserve_and_mod_sb(
        if (msbp > msb) {
                error = xfs_mod_incore_sb_batch(tp->t_mountp, msb,
                        (uint)(msbp - msb), rsvd);
-                ASSERT(error == 0);
+                if (error)
+                        goto out_undo_ifreecount;
        }
+        return;
+out_undo_ifreecount:
+        if (ifreedelta)
+                xfs_icsb_modify_counters(mp, XFS_SBS_IFREE, -ifreedelta, rsvd);
+out_undo_icount:
+        if (idelta)
+                xfs_icsb_modify_counters(mp, XFS_SBS_ICOUNT, -idelta, rsvd);
+out_undo_fdblocks:
+        if (blkdelta)
+                xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, -blkdelta, rsvd);
+out:
+        ASSERT(error = 0);
+        return;
 }
 /*
@@ -1389,15 +1411,12 @@ xfs_trans_item_committed(
 */
 STATIC void
 xfs_trans_committed(
-        struct xfs_trans        *tp,
+        void                    *arg,
        int                     abortflag)
 {
+        struct xfs_trans        *tp = arg;
        struct xfs_log_item_desc *lidp, *next;
-        /* Call the transaction's completion callback if there is one. */
-        if (tp->t_callback != NULL)
-                tp->t_callback(tp, tp->t_callarg);
        list_for_each_entry_safe(lidp, next, &tp->t_items, lid_trans) {
                xfs_trans_item_committed(lidp->lid_item, tp->t_lsn, abortflag);
                xfs_trans_free_item_desc(lidp);
@@ -1525,7 +1544,7 @@ xfs_trans_commit_iclog(
         * running in simulation mode (the log is explicitly turned
         * off).
         */
-        tp->t_logcb.cb_func = (void(*)(void*, int))xfs_trans_committed;
+        tp->t_logcb.cb_func = xfs_trans_committed;
        tp->t_logcb.cb_arg = tp;
        /*
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index c13c0f97b494..246286b77a86 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -399,8 +399,6 @@ typedef struct xfs_trans {
                                                 * transaction. */
        struct xfs_mount        *t_mountp;      /* ptr to fs mount struct */
        struct xfs_dquot_acct   *t_dqinfo;      /* acctg info for dquots */
-        xfs_trans_callback_t    t_callback;     /* transaction callback */
-        void                    *t_callarg;     /* callback arg */
        unsigned int            t_flags;        /* misc flags */
        int64_t                 t_icount_delta; /* superblock icount change */
        int64_t                 t_ifree_delta;  /* superblock ifree change */
@@ -473,6 +471,7 @@ void		xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint);
 void            xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *);
 int             xfs_trans_iget(struct xfs_mount *, xfs_trans_t *,
                               xfs_ino_t , uint, uint, struct xfs_inode **);
+void            xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int);
 void            xfs_trans_ijoin_ref(struct xfs_trans *, struct xfs_inode *, uint);
 void            xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *);
 void            xfs_trans_log_buf(xfs_trans_t *, struct xfs_buf *, uint, uint);
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 90af025e6839..c47918c302a5 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -336,7 +336,7 @@ xfs_trans_read_buf(
                        ASSERT(!XFS_BUF_ISASYNC(bp));
                        XFS_BUF_READ(bp);
                        xfsbdstrat(tp->t_mountp, bp);
-                        error = xfs_iowait(bp);
+                        error = xfs_buf_iowait(bp);
                        if (error) {
                                xfs_ioerror_alert("xfs_trans_read_buf", mp,
                                                  bp, blkno);
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index cdc53a1050c5..ccb34532768b 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -118,6 +118,36 @@ xfs_trans_ijoin_ref(
 }
 /*
+ * Transactional inode timestamp update. Requires the inode to be locked and
+ * joined to the transaction supplied. Relies on the transaction subsystem to
+ * track dirty state and update/writeback the inode accordingly.
+ */
+void
+xfs_trans_ichgtime(
+        struct xfs_trans        *tp,
+        struct xfs_inode        *ip,
+        int                     flags)
+{
+        struct inode            *inode = VFS_I(ip);
+        timespec_t              tv;
+        ASSERT(tp);
+        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+        ASSERT(ip->i_transp == tp);
+        tv = current_fs_time(inode->i_sb);
+        if ((flags & XFS_ICHGTIME_MOD) &&
+            !timespec_equal(&inode->i_mtime, &tv)) {
+                inode->i_mtime = tv;
+        }
+        if ((flags & XFS_ICHGTIME_CHG) &&
+            !timespec_equal(&inode->i_ctime, &tv)) {
+                inode->i_ctime = tv;
+        }
+}
+/*
 * This is called to mark the fields indicated in fieldmask as needing
 * to be logged when the transaction is committed.  The inode must
 * already be associated with the given transaction.
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h
index 320775295e32..26d1867d8156 100644
--- a/fs/xfs/xfs_types.h
+++ b/fs/xfs/xfs_types.h
@@ -73,8 +73,6 @@ typedef	__int32_t	xfs_tid_t;	/* transaction identifier */
 typedef __uint32_t      xfs_dablk_t;    /* dir/attr block number (in file) */
 typedef __uint32_t      xfs_dahash_t;   /* dir/attr hash value */
-typedef __uint16_t      xfs_prid_t;     /* prid_t truncated to 16bits in XFS */
 typedef __uint32_t      xlog_tid_t;     /* transaction ID type */
 /*
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index b7d5769d2df0..8b32d1a4c5a1 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -56,7 +56,6 @@ xfs_dir_ialloc(
        mode_t          mode,
        xfs_nlink_t     nlink,
        xfs_dev_t       rdev,
-        cred_t          *credp,
        prid_t          prid,           /* project id */
        int             okalloc,        /* ok to allocate new space */
        xfs_inode_t     **ipp,          /* pointer to inode; it will be
@@ -93,7 +92,7 @@ xfs_dir_ialloc(
         * transaction commit so that no other process can steal
         * the inode(s) that we've just allocated.
         */
-        code = xfs_ialloc(tp, dp, mode, nlink, rdev, credp, prid, okalloc,
+        code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, okalloc,
                          &ialloc_context, &call_again, &ip);
        /*
@@ -197,7 +196,7 @@ xfs_dir_ialloc(
                 * other allocations in this allocation group,
                 * this call should always succeed.
                 */
-                code = xfs_ialloc(tp, dp, mode, nlink, rdev, credp, prid,
+                code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid,
                                  okalloc, &ialloc_context, &call_again, &ip);
                /*
@@ -235,7 +234,7 @@ xfs_droplink(
 {
        int     error;
-        xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
+        xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
        ASSERT (ip->i_d.di_nlink > 0);
        ip->i_d.di_nlink--;
@@ -299,7 +298,7 @@ xfs_bumplink(
 {
        if (ip->i_d.di_nlink >= XFS_MAXLINK)
                return XFS_ERROR(EMLINK);
-        xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
+        xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
        ASSERT(ip->i_d.di_nlink > 0);
        ip->i_d.di_nlink++;
diff --git a/fs/xfs/xfs_utils.h b/fs/xfs/xfs_utils.h
index f55b9678264f..456fca314933 100644
--- a/fs/xfs/xfs_utils.h
+++ b/fs/xfs/xfs_utils.h
@@ -19,8 +19,7 @@
 #define __XFS_UTILS_H__
 extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t,
-                                xfs_dev_t, cred_t *, prid_t, int,
+                                xfs_dev_t, prid_t, int, xfs_inode_t **, int *);
-                                xfs_inode_t **, int *);
 extern int xfs_droplink(xfs_trans_t *, xfs_inode_t *);
 extern int xfs_bumplink(xfs_trans_t *, xfs_inode_t *);
 extern void xfs_bump_ino_vers2(xfs_trans_t *, xfs_inode_t *);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 4c7c7bfb2b2f..8e4a63c4151a 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -114,7 +114,7 @@ xfs_setattr(
                 */
                ASSERT(udqp == NULL);
                ASSERT(gdqp == NULL);
-                code = xfs_qm_vop_dqalloc(ip, uid, gid, ip->i_d.di_projid,
+                code = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip),
                                         qflags, &udqp, &gdqp);
                if (code)
                        return code;
@@ -184,8 +184,11 @@ xfs_setattr(
                    ip->i_size == 0 && ip->i_d.di_nextents == 0) {
                        xfs_iunlock(ip, XFS_ILOCK_EXCL);
                        lock_flags &= ~XFS_ILOCK_EXCL;
-                        if (mask & ATTR_CTIME)
+                        if (mask & ATTR_CTIME) {
-                                xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+                                inode->i_mtime = inode->i_ctime =
+                                                current_fs_time(inode->i_sb);
+                                xfs_mark_inode_dirty_sync(ip);
+                        }
                        code = 0;
                        goto error_return;
                }
@@ -1253,8 +1256,7 @@ xfs_create(
        struct xfs_name         *name,
        mode_t                  mode,
        xfs_dev_t               rdev,
-        xfs_inode_t             **ipp,
+        xfs_inode_t             **ipp)
-        cred_t                  *credp)
 {
        int                     is_dir = S_ISDIR(mode);
        struct xfs_mount        *mp = dp->i_mount;
@@ -1266,7 +1268,7 @@ xfs_create(
        boolean_t               unlock_dp_on_error = B_FALSE;
        uint                    cancel_flags;
        int                     committed;
-        xfs_prid_t              prid;
+        prid_t                  prid;
        struct xfs_dquot        *udqp = NULL;
        struct xfs_dquot        *gdqp = NULL;
        uint                    resblks;
@@ -1279,9 +1281,9 @@ xfs_create(
                return XFS_ERROR(EIO);
        if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
-                prid = dp->i_d.di_projid;
+                prid = xfs_get_projid(dp);
        else
-                prid = dfltprid;
+                prid = XFS_PROJID_DEFAULT;
        /*
         * Make sure that we have allocated dquot(s) on disk.
@@ -1360,7 +1362,7 @@ xfs_create(
         * entry pointing to them, but a directory also the "." entry
         * pointing to itself.
         */
-        error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, credp,
+        error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev,
                               prid, resblks > 0, &ip, &committed);
        if (error) {
                if (error == ENOSPC)
@@ -1391,7 +1393,7 @@ xfs_create(
                ASSERT(error != ENOSPC);
                goto out_trans_abort;
        }
-        xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+        xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
        xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
        if (is_dir) {
@@ -1742,7 +1744,7 @@ xfs_remove(
                ASSERT(error != ENOENT);
                goto out_bmap_cancel;
        }
-        xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+        xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
        if (is_dir) {
                /*
@@ -1880,7 +1882,7 @@ xfs_link(
         * the tree quota mechanism could be circumvented.
         */
        if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
-                     (tdp->i_d.di_projid != sip->i_d.di_projid))) {
+                     (xfs_get_projid(tdp) != xfs_get_projid(sip)))) {
                error = XFS_ERROR(EXDEV);
                goto error_return;
        }
@@ -1895,7 +1897,7 @@ xfs_link(
                                        &first_block, &free_list, resblks);
        if (error)
                goto abort_return;
-        xfs_ichgtime(tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+        xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
        xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE);
        error = xfs_bumplink(tp, sip);
@@ -1933,8 +1935,7 @@ xfs_symlink(
        struct xfs_name         *link_name,
        const char              *target_path,
        mode_t                  mode,
-        xfs_inode_t             **ipp,
+        xfs_inode_t             **ipp)
-        cred_t                  *credp)
 {
        xfs_mount_t             *mp = dp->i_mount;
        xfs_trans_t             *tp;
@@ -1955,7 +1956,7 @@ xfs_symlink(
        int                     byte_cnt;
        int                     n;
        xfs_buf_t               *bp;
-        xfs_prid_t              prid;
+        prid_t                  prid;
        struct xfs_dquot        *udqp, *gdqp;
        uint                    resblks;
@@ -1978,9 +1979,9 @@ xfs_symlink(
        udqp = gdqp = NULL;
        if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
-                prid = dp->i_d.di_projid;
+                prid = xfs_get_projid(dp);
        else
-                prid = (xfs_prid_t)dfltprid;
+                prid = XFS_PROJID_DEFAULT;
        /*
         * Make sure that we have allocated dquot(s) on disk.
@@ -2046,8 +2047,8 @@ xfs_symlink(
        /*
         * Allocate an inode for the symlink.
         */
-        error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT),
+        error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0,
-                               1, 0, credp, prid, resblks > 0, &ip, NULL);
+                               prid, resblks > 0, &ip, NULL);
        if (error) {
                if (error == ENOSPC)
                        goto error_return;
@@ -2129,7 +2130,7 @@ xfs_symlink(
                                        &first_block, &free_list, resblks);
        if (error)
                goto error1;
-        xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+        xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
        xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
        /*
@@ -2272,7 +2273,7 @@ xfs_alloc_file_space(
        count = len;
        imapp = &imaps[0];
        nimaps = 1;
-        bmapi_flag = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0);
+        bmapi_flag = XFS_BMAPI_WRITE | alloc_type;
        startoffset_fsb = XFS_B_TO_FSBT(mp, offset);
        allocatesize_fsb = XFS_B_TO_FSB(mp, count);
@@ -2431,9 +2432,9 @@ xfs_zero_remaining_bytes(
        if (endoff > ip->i_size)
                endoff = ip->i_size;
-        bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize,
+        bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ?
-                                XFS_IS_REALTIME_INODE(ip) ?
+                                        mp->m_rtdev_targp : mp->m_ddev_targp,
-                                mp->m_rtdev_targp : mp->m_ddev_targp);
+                                mp->m_sb.sb_blocksize, XBF_DONT_BLOCK);
        if (!bp)
                return XFS_ERROR(ENOMEM);
@@ -2459,7 +2460,7 @@ xfs_zero_remaining_bytes(
                XFS_BUF_READ(bp);
                XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock));
                xfsbdstrat(mp, bp);
-                error = xfs_iowait(bp);
+                error = xfs_buf_iowait(bp);
                if (error) {
                        xfs_ioerror_alert("xfs_zero_remaining_bytes(read)",
                                          mp, bp, XFS_BUF_ADDR(bp));
@@ -2472,7 +2473,7 @@ xfs_zero_remaining_bytes(
                XFS_BUF_UNREAD(bp);
                XFS_BUF_WRITE(bp);
                xfsbdstrat(mp, bp);
-                error = xfs_iowait(bp);
+                error = xfs_buf_iowait(bp);
                if (error) {
                        xfs_ioerror_alert("xfs_zero_remaining_bytes(write)",
                                          mp, bp, XFS_BUF_ADDR(bp));
@@ -2711,6 +2712,7 @@ xfs_change_file_space(
        xfs_off_t       llen;
        xfs_trans_t     *tp;
        struct iattr    iattr;
+        int             prealloc_type;
        if (!S_ISREG(ip->i_d.di_mode))
                return XFS_ERROR(EINVAL);
@@ -2753,12 +2755,17 @@ xfs_change_file_space(
         * size to be changed.
         */
        setprealloc = clrprealloc = 0;
+        prealloc_type = XFS_BMAPI_PREALLOC;
        switch (cmd) {
+        case XFS_IOC_ZERO_RANGE:
+                prealloc_type |= XFS_BMAPI_CONVERT;
+                xfs_tosspages(ip, startoffset, startoffset + bf->l_len, 0);
+                /* FALLTHRU */
        case XFS_IOC_RESVSP:
        case XFS_IOC_RESVSP64:
                error = xfs_alloc_file_space(ip, startoffset, bf->l_len,
-                                                                1, attr_flags);
+                                                prealloc_type, attr_flags);
                if (error)
                        return error;
                setprealloc = 1;
@@ -2827,7 +2834,7 @@ xfs_change_file_space(
                if (ip->i_d.di_mode & S_IXGRP)
                        ip->i_d.di_mode &= ~S_ISGID;
-                xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+                xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
        }
        if (setprealloc)
                ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC;
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index d8dfa8d0dadd..f6702927eee4 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -2,7 +2,6 @@
 #define _XFS_VNODEOPS_H 1
 struct attrlist_cursor_kern;
-struct cred;
 struct file;
 struct iattr;
 struct inode;
@@ -26,7 +25,7 @@ int xfs_inactive(struct xfs_inode *ip);
 int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
                struct xfs_inode **ipp, struct xfs_name *ci_name);
 int xfs_create(struct xfs_inode *dp, struct xfs_name *name, mode_t mode,
-                xfs_dev_t rdev, struct xfs_inode **ipp, cred_t *credp);
+                xfs_dev_t rdev, struct xfs_inode **ipp);
 int xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
                struct xfs_inode *ip);
 int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
@@ -34,8 +33,7 @@ int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
 int xfs_readdir(struct xfs_inode        *dp, void *dirent, size_t bufsize,
                       xfs_off_t *offset, filldir_t filldir);
 int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name,
-                const char *target_path, mode_t mode, struct xfs_inode **ipp,
+                const char *target_path, mode_t mode, struct xfs_inode **ipp);
-                cred_t *credp);
 int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state);
 int xfs_change_file_space(struct xfs_inode *ip, int cmd,
                xfs_flock64_t *bf, xfs_off_t offset, int attr_flags);
author	Linus Torvalds <torvalds@linux-foundation.org>	2010-10-22 20:32:27 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2010-10-22 20:32:27 -0400
commit	5fe3a5ae5c09d53b2b3c7a971e1d87ab3a747055 (patch)
tree	1e0d3e10c83e456a1678c4e01acb5ff624129202
parent	0fc0531e0a2174377a86fd6953ecaa00287d8f70 (diff)
parent	39dc948c6921169e13224a97fa53188922acfde8 (diff)