58 files changed, 2719 insertions, 2572 deletions
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 94d3cdfbf9b8..d1db8c17a74e 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -40,11 +40,10 @@
 #include "xfs_rw.h"
 #include "xfs_iomap.h"
 #include <linux/mpage.h>
+#include <linux/pagevec.h>
 #include <linux/writeback.h>
 STATIC void xfs_count_page_state(struct page *, int *, int *, int *);
-STATIC void xfs_convert_page(struct inode *, struct page *, xfs_iomap_t *,
-                struct writeback_control *wbc, void *, int, int);
 #if defined(XFS_RW_TRACE)
 void
@@ -55,17 +54,15 @@ xfs_page_trace(
        int             mask)
 {
        xfs_inode_t     *ip;
-        bhv_desc_t      *bdp;
        vnode_t         *vp = LINVFS_GET_VP(inode);
        loff_t          isize = i_size_read(inode);
-        loff_t          offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
+        loff_t          offset = page_offset(page);
        int             delalloc = -1, unmapped = -1, unwritten = -1;
        if (page_has_buffers(page))
                xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
-        bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops);
+        ip = xfs_vtoi(vp);
-        ip = XFS_BHVTOI(bdp);
        if (!ip->i_rwtrace)
                return;
@@ -103,15 +100,56 @@ xfs_finish_ioend(
                queue_work(xfsdatad_workqueue, &ioend->io_work);
 }
+/*
+ * We're now finished for good with this ioend structure.
+ * Update the page state via the associated buffer_heads,
+ * release holds on the inode and bio, and finally free
+ * up memory.  Do not use the ioend after this.
+ */
 STATIC void
 xfs_destroy_ioend(
        xfs_ioend_t             *ioend)
 {
+        struct buffer_head      *bh, *next;
+        for (bh = ioend->io_buffer_head; bh; bh = next) {
+                next = bh->b_private;
+                bh->b_end_io(bh, ioend->io_uptodate);
+        }
        vn_iowake(ioend->io_vnode);
        mempool_free(ioend, xfs_ioend_pool);
 }
 /*
+ * Buffered IO write completion for delayed allocate extents.
+ * TODO: Update ondisk isize now that we know the file data
+ * has been flushed (i.e. the notorious "NULL file" problem).
+ */
+STATIC void
+xfs_end_bio_delalloc(
+        void                    *data)
+{
+        xfs_ioend_t             *ioend = data;
+        xfs_destroy_ioend(ioend);
+}
+/*
+ * Buffered IO write completion for regular, written extents.
+ */
+STATIC void
+xfs_end_bio_written(
+        void                    *data)
+{
+        xfs_ioend_t             *ioend = data;
+        xfs_destroy_ioend(ioend);
+}
+/*
+ * IO write completion for unwritten extents.
+ *
 * Issue transactions to convert a buffer range from unwritten
 * to written extents.
 */
@@ -123,21 +161,10 @@ xfs_end_bio_unwritten(
        vnode_t                 *vp = ioend->io_vnode;
        xfs_off_t               offset = ioend->io_offset;
        size_t                  size = ioend->io_size;
-        struct buffer_head      *bh, *next;
        int                     error;
        if (ioend->io_uptodate)
                VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error);
-        /* ioend->io_buffer_head is only non-NULL for buffered I/O */
-        for (bh = ioend->io_buffer_head; bh; bh = next) {
-                next = bh->b_private;
-                bh->b_end_io = NULL;
-                clear_buffer_unwritten(bh);
-                end_buffer_async_write(bh, ioend->io_uptodate);
-        }
        xfs_destroy_ioend(ioend);
 }
@@ -149,7 +176,8 @@ xfs_end_bio_unwritten(
 */
 STATIC xfs_ioend_t *
 xfs_alloc_ioend(
-        struct inode            *inode)
+        struct inode            *inode,
+        unsigned int            type)
 {
        xfs_ioend_t             *ioend;
@@ -162,45 +190,25 @@ xfs_alloc_ioend(
         */
        atomic_set(&ioend->io_remaining, 1);
        ioend->io_uptodate = 1; /* cleared if any I/O fails */
+        ioend->io_list = NULL;
+        ioend->io_type = type;
        ioend->io_vnode = LINVFS_GET_VP(inode);
        ioend->io_buffer_head = NULL;
+        ioend->io_buffer_tail = NULL;
        atomic_inc(&ioend->io_vnode->v_iocount);
        ioend->io_offset = 0;
        ioend->io_size = 0;
-        INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten, ioend);
+        if (type == IOMAP_UNWRITTEN)
+                INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten, ioend);
+        else if (type == IOMAP_DELAY)
+                INIT_WORK(&ioend->io_work, xfs_end_bio_delalloc, ioend);
+        else
+                INIT_WORK(&ioend->io_work, xfs_end_bio_written, ioend);
        return ioend;
 }
-void
-linvfs_unwritten_done(
-        struct buffer_head      *bh,
-        int                     uptodate)
-{
-        xfs_ioend_t             *ioend = bh->b_private;
-        static spinlock_t       unwritten_done_lock = SPIN_LOCK_UNLOCKED;
-        unsigned long           flags;
-        ASSERT(buffer_unwritten(bh));
-        bh->b_end_io = NULL;
-        if (!uptodate)
-                ioend->io_uptodate = 0;
-        /*
-         * Deep magic here.  We reuse b_private in the buffer_heads to build
-         * a chain for completing the I/O from user context after we've issued
-         * a transaction to convert the unwritten extent.
-         */
-        spin_lock_irqsave(&unwritten_done_lock, flags);
-        bh->b_private = ioend->io_buffer_head;
-        ioend->io_buffer_head = bh;
-        spin_unlock_irqrestore(&unwritten_done_lock, flags);
-        xfs_finish_ioend(ioend);
-}
 STATIC int
 xfs_map_blocks(
        struct inode            *inode,
@@ -218,138 +226,260 @@ xfs_map_blocks(
        return -error;
 }
+STATIC inline int
+xfs_iomap_valid(
+        xfs_iomap_t             *iomapp,
+        loff_t                  offset)
+{
+        return offset >= iomapp->iomap_offset &&
+                offset < iomapp->iomap_offset + iomapp->iomap_bsize;
+}
 /*
- * Finds the corresponding mapping in block @map array of the
+ * BIO completion handler for buffered IO.
- * given @offset within a @page.
 */
-STATIC xfs_iomap_t *
+STATIC int
-xfs_offset_to_map(
+xfs_end_bio(
+        struct bio              *bio,
+        unsigned int            bytes_done,
+        int                     error)
+{
+        xfs_ioend_t             *ioend = bio->bi_private;
+        if (bio->bi_size)
+                return 1;
+        ASSERT(ioend);
+        ASSERT(atomic_read(&bio->bi_cnt) >= 1);
+        /* Toss bio and pass work off to an xfsdatad thread */
+        if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
+                ioend->io_uptodate = 0;
+        bio->bi_private = NULL;
+        bio->bi_end_io = NULL;
+        bio_put(bio);
+        xfs_finish_ioend(ioend);
+        return 0;
+}
+STATIC void
+xfs_submit_ioend_bio(
+        xfs_ioend_t     *ioend,
+        struct bio      *bio)
+{
+        atomic_inc(&ioend->io_remaining);
+        bio->bi_private = ioend;
+        bio->bi_end_io = xfs_end_bio;
+        submit_bio(WRITE, bio);
+        ASSERT(!bio_flagged(bio, BIO_EOPNOTSUPP));
+        bio_put(bio);
+}
+STATIC struct bio *
+xfs_alloc_ioend_bio(
+        struct buffer_head      *bh)
+{
+        struct bio              *bio;
+        int                     nvecs = bio_get_nr_vecs(bh->b_bdev);
+        do {
+                bio = bio_alloc(GFP_NOIO, nvecs);
+                nvecs >>= 1;
+        } while (!bio);
+        ASSERT(bio->bi_private == NULL);
+        bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
+        bio->bi_bdev = bh->b_bdev;
+        bio_get(bio);
+        return bio;
+}
+STATIC void
+xfs_start_buffer_writeback(
+        struct buffer_head      *bh)
+{
+        ASSERT(buffer_mapped(bh));
+        ASSERT(buffer_locked(bh));
+        ASSERT(!buffer_delay(bh));
+        ASSERT(!buffer_unwritten(bh));
+        mark_buffer_async_write(bh);
+        set_buffer_uptodate(bh);
+        clear_buffer_dirty(bh);
+}
+STATIC void
+xfs_start_page_writeback(
        struct page             *page,
-        xfs_iomap_t             *iomapp,
+        struct writeback_control *wbc,
-        unsigned long           offset)
+        int                     clear_dirty,
+        int                     buffers)
+{
+        ASSERT(PageLocked(page));
+        ASSERT(!PageWriteback(page));
+        set_page_writeback(page);
+        if (clear_dirty)
+                clear_page_dirty(page);
+        unlock_page(page);
+        if (!buffers) {
+                end_page_writeback(page);
+                wbc->pages_skipped++;   /* We didn't write this page */
+        }
+}
+static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh)
+{
+        return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
+}
+/*
+ * Submit all of the bios for all of the ioends we have saved up,
+ * covering the initial writepage page and also any probed pages.
+ */
+STATIC void
+xfs_submit_ioend(
+        xfs_ioend_t             *ioend)
+{
+        xfs_ioend_t             *next;
+        struct buffer_head      *bh;
+        struct bio              *bio;
+        sector_t                lastblock = 0;
+        do {
+                next = ioend->io_list;
+                bio = NULL;
+                for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
+                        xfs_start_buffer_writeback(bh);
+                        if (!bio) {
+ retry:
+                                bio = xfs_alloc_ioend_bio(bh);
+                        } else if (bh->b_blocknr != lastblock + 1) {
+                                xfs_submit_ioend_bio(ioend, bio);
+                                goto retry;
+                        }
+                        if (bio_add_buffer(bio, bh) != bh->b_size) {
+                                xfs_submit_ioend_bio(ioend, bio);
+                                goto retry;
+                        }
+                        lastblock = bh->b_blocknr;
+                }
+                if (bio)
+                        xfs_submit_ioend_bio(ioend, bio);
+                xfs_finish_ioend(ioend);
+        } while ((ioend = next) != NULL);
+}
+/*
+ * Cancel submission of all buffer_heads so far in this endio.
+ * Toss the endio too.  Only ever called for the initial page
+ * in a writepage request, so only ever one page.
+ */
+STATIC void
+xfs_cancel_ioend(
+        xfs_ioend_t             *ioend)
+{
+        xfs_ioend_t             *next;
+        struct buffer_head      *bh, *next_bh;
+        do {
+                next = ioend->io_list;
+                bh = ioend->io_buffer_head;
+                do {
+                        next_bh = bh->b_private;
+                        clear_buffer_async_write(bh);
+                        unlock_buffer(bh);
+                } while ((bh = next_bh) != NULL);
+                vn_iowake(ioend->io_vnode);
+                mempool_free(ioend, xfs_ioend_pool);
+        } while ((ioend = next) != NULL);
+}
+/*
+ * Test to see if we've been building up a completion structure for
+ * earlier buffers -- if so, we try to append to this ioend if we
+ * can, otherwise we finish off any current ioend and start another.
+ * Return true if we've finished the given ioend.
+ */
+STATIC void
+xfs_add_to_ioend(
+        struct inode            *inode,
+        struct buffer_head      *bh,
+        xfs_off_t               offset,
+        unsigned int            type,
+        xfs_ioend_t             **result,
+        int                     need_ioend)
 {
-        loff_t                  full_offset;    /* offset from start of file */
+        xfs_ioend_t             *ioend = *result;
-        ASSERT(offset < PAGE_CACHE_SIZE);
+        if (!ioend || need_ioend || type != ioend->io_type) {
+                xfs_ioend_t     *previous = *result;
-        full_offset = page->index;              /* NB: using 64bit number */
+                ioend = xfs_alloc_ioend(inode, type);
-        full_offset <<= PAGE_CACHE_SHIFT;       /* offset from file start */
+                ioend->io_offset = offset;
-        full_offset += offset;                  /* offset from page start */
+                ioend->io_buffer_head = bh;
+                ioend->io_buffer_tail = bh;
+                if (previous)
+                        previous->io_list = ioend;
+                *result = ioend;
+        } else {
+                ioend->io_buffer_tail->b_private = bh;
+                ioend->io_buffer_tail = bh;
+        }
-        if (full_offset < iomapp->iomap_offset)
+        bh->b_private = NULL;
-                return NULL;
+        ioend->io_size += bh->b_size;
-        if (iomapp->iomap_offset + (iomapp->iomap_bsize -1) >= full_offset)
-                return iomapp;
-        return NULL;
 }
 STATIC void
 xfs_map_at_offset(
-        struct page             *page,
        struct buffer_head      *bh,
-        unsigned long           offset,
+        loff_t                  offset,
        int                     block_bits,
        xfs_iomap_t             *iomapp)
 {
        xfs_daddr_t             bn;
-        loff_t                  delta;
        int                     sector_shift;
        ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE));
        ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY));
        ASSERT(iomapp->iomap_bn != IOMAP_DADDR_NULL);
-        delta = page->index;
-        delta <<= PAGE_CACHE_SHIFT;
-        delta += offset;
-        delta -= iomapp->iomap_offset;
-        delta >>= block_bits;
        sector_shift = block_bits - BBSHIFT;
-        bn = iomapp->iomap_bn >> sector_shift;
+        bn = (iomapp->iomap_bn >> sector_shift) +
-        bn += delta;
+              ((offset - iomapp->iomap_offset) >> block_bits);
-        BUG_ON(!bn && !(iomapp->iomap_flags & IOMAP_REALTIME));
+        ASSERT(bn || (iomapp->iomap_flags & IOMAP_REALTIME));
        ASSERT((bn << sector_shift) >= iomapp->iomap_bn);
        lock_buffer(bh);
        bh->b_blocknr = bn;
-        bh->b_bdev = iomapp->iomap_target->pbr_bdev;
+        bh->b_bdev = iomapp->iomap_target->bt_bdev;
        set_buffer_mapped(bh);
        clear_buffer_delay(bh);
+        clear_buffer_unwritten(bh);
 }
 /*
- * Look for a page at index which is unlocked and contains our
+ * Look for a page at index that is suitable for clustering.
- * unwritten extent flagged buffers at its head.  Returns page
- * locked and with an extra reference count, and length of the
- * unwritten extent component on this page that we can write,
- * in units of filesystem blocks.
- */
-STATIC struct page *
-xfs_probe_unwritten_page(
-        struct address_space    *mapping,
-        pgoff_t                 index,
-        xfs_iomap_t             *iomapp,
-        xfs_ioend_t             *ioend,
-        unsigned long           max_offset,
-        unsigned long           *fsbs,
-        unsigned int            bbits)
-{
-        struct page             *page;
-        page = find_trylock_page(mapping, index);
-        if (!page)
-                return NULL;
-        if (PageWriteback(page))
-                goto out;
-        if (page->mapping && page_has_buffers(page)) {
-                struct buffer_head      *bh, *head;
-                unsigned long           p_offset = 0;
-                *fsbs = 0;
-                bh = head = page_buffers(page);
-                do {
-                        if (!buffer_unwritten(bh) || !buffer_uptodate(bh))
-                                break;
-                        if (!xfs_offset_to_map(page, iomapp, p_offset))
-                                break;
-                        if (p_offset >= max_offset)
-                                break;
-                        xfs_map_at_offset(page, bh, p_offset, bbits, iomapp);
-                        set_buffer_unwritten_io(bh);
-                        bh->b_private = ioend;
-                        p_offset += bh->b_size;
-                        (*fsbs)++;
-                } while ((bh = bh->b_this_page) != head);
-                if (p_offset)
-                        return page;
-        }
-out:
-        unlock_page(page);
-        return NULL;
-}
-/*
- * Look for a page at index which is unlocked and not mapped
- * yet - clustering for mmap write case.
 */
 STATIC unsigned int
-xfs_probe_unmapped_page(
+xfs_probe_page(
-        struct address_space    *mapping,
+        struct page             *page,
-        pgoff_t                 index,
+        unsigned int            pg_offset,
-        unsigned int            pg_offset)
+        int                     mapped)
 {
-        struct page             *page;
        int                     ret = 0;
-        page = find_trylock_page(mapping, index);
-        if (!page)
-                return 0;
        if (PageWriteback(page))
-                goto out;
+                return 0;
        if (page->mapping && PageDirty(page)) {
                if (page_has_buffers(page)) {
@@ -357,79 +487,101 @@ xfs_probe_unmapped_page(
                        bh = head = page_buffers(page);
                        do {
-                                if (buffer_mapped(bh) || !buffer_uptodate(bh))
+                                if (!buffer_uptodate(bh))
+                                        break;
+                                if (mapped != buffer_mapped(bh))
                                        break;
                                ret += bh->b_size;
                                if (ret >= pg_offset)
                                        break;
                        } while ((bh = bh->b_this_page) != head);
                } else
-                        ret = PAGE_CACHE_SIZE;
+                        ret = mapped ? 0 : PAGE_CACHE_SIZE;
        }
-out:
-        unlock_page(page);
        return ret;
 }
-STATIC unsigned int
+STATIC size_t
-xfs_probe_unmapped_cluster(
+xfs_probe_cluster(
        struct inode            *inode,
        struct page             *startpage,
        struct buffer_head      *bh,
-        struct buffer_head      *head)
+        struct buffer_head      *head,
+        int                     mapped)
 {
+        struct pagevec          pvec;
        pgoff_t                 tindex, tlast, tloff;
-        unsigned int            pg_offset, len, total = 0;
+        size_t                  total = 0;
-        struct address_space    *mapping = inode->i_mapping;
+        int                     done = 0, i;
        /* First sum forwards in this page */
        do {
-                if (buffer_mapped(bh))
+                if (mapped != buffer_mapped(bh))
-                        break;
+                        return total;
                total += bh->b_size;
        } while ((bh = bh->b_this_page) != head);
-        /* If we reached the end of the page, sum forwards in
+        /* if we reached the end of the page, sum forwards in following pages */
-         * following pages.
+        tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT;
-         */
+        tindex = startpage->index + 1;
-        if (bh == head) {
-                tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT;
+        /* Prune this back to avoid pathological behavior */
-                /* Prune this back to avoid pathological behavior */
+        tloff = min(tlast, startpage->index + 64);
-                tloff = min(tlast, startpage->index + 64);
-                for (tindex = startpage->index + 1; tindex < tloff; tindex++) {
+        pagevec_init(&pvec, 0);
-                        len = xfs_probe_unmapped_page(mapping, tindex,
+        while (!done && tindex <= tloff) {
-                                                        PAGE_CACHE_SIZE);
+                unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1);
-                        if (!len)
-                                return total;
+                if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len))
+                        break;
+                for (i = 0; i < pagevec_count(&pvec); i++) {
+                        struct page *page = pvec.pages[i];
+                        size_t pg_offset, len = 0;
+                        if (tindex == tlast) {
+                                pg_offset =
+                                    i_size_read(inode) & (PAGE_CACHE_SIZE - 1);
+                                if (!pg_offset) {
+                                        done = 1;
+                                        break;
+                                }
+                        } else
+                                pg_offset = PAGE_CACHE_SIZE;
+                        if (page->index == tindex && !TestSetPageLocked(page)) {
+                                len = xfs_probe_page(page, pg_offset, mapped);
+                                unlock_page(page);
+                        }
+                        if (!len) {
+                                done = 1;
+                                break;
+                        }
                        total += len;
+                        tindex++;
                }
-                if (tindex == tlast &&
-                    (pg_offset = i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) {
+                pagevec_release(&pvec);
-                        total += xfs_probe_unmapped_page(mapping,
+                cond_resched();
-                                                        tindex, pg_offset);
-                }
        }
        return total;
 }
 /*
- * Probe for a given page (index) in the inode and test if it is delayed
+ * Test if a given page is suitable for writing as part of an unwritten
- * and without unwritten buffers.  Returns page locked and with an extra
+ * or delayed allocate extent.
- * reference count.
 */
-STATIC struct page *
+STATIC int
-xfs_probe_delalloc_page(
+xfs_is_delayed_page(
-        struct inode            *inode,
+        struct page             *page,
-        pgoff_t                 index)
+        unsigned int            type)
 {
-        struct page             *page;
-        page = find_trylock_page(inode->i_mapping, index);
-        if (!page)
-                return NULL;
        if (PageWriteback(page))
-                goto out;
+                return 0;
        if (page->mapping && page_has_buffers(page)) {
                struct buffer_head      *bh, *head;
@@ -437,243 +589,156 @@ xfs_probe_delalloc_page(
                bh = head = page_buffers(page);
                do {
-                        if (buffer_unwritten(bh)) {
+                        if (buffer_unwritten(bh))
-                                acceptable = 0;
+                                acceptable = (type == IOMAP_UNWRITTEN);
+                        else if (buffer_delay(bh))
+                                acceptable = (type == IOMAP_DELAY);
+                        else if (buffer_mapped(bh))
+                                acceptable = (type == 0);
+                        else
                                break;
-                        } else if (buffer_delay(bh)) {
-                                acceptable = 1;
-                        }
                } while ((bh = bh->b_this_page) != head);
                if (acceptable)
-                        return page;
+                        return 1;
-        }
-out:
-        unlock_page(page);
-        return NULL;
-}
-STATIC int
-xfs_map_unwritten(
-        struct inode            *inode,
-        struct page             *start_page,
-        struct buffer_head      *head,
-        struct buffer_head      *curr,
-        unsigned long           p_offset,
-        int                     block_bits,
-        xfs_iomap_t             *iomapp,
-        struct writeback_control *wbc,
-        int                     startio,
-        int                     all_bh)
-{
-        struct buffer_head      *bh = curr;
-        xfs_iomap_t             *tmp;
-        xfs_ioend_t             *ioend;
-        loff_t                  offset;
-        unsigned long           nblocks = 0;
-        offset = start_page->index;
-        offset <<= PAGE_CACHE_SHIFT;
-        offset += p_offset;
-        ioend = xfs_alloc_ioend(inode);
-        /* First map forwards in the page consecutive buffers
-         * covering this unwritten extent
-         */
-        do {
-                if (!buffer_unwritten(bh))
-                        break;
-                tmp = xfs_offset_to_map(start_page, iomapp, p_offset);
-                if (!tmp)
-                        break;
-                xfs_map_at_offset(start_page, bh, p_offset, block_bits, iomapp);
-                set_buffer_unwritten_io(bh);
-                bh->b_private = ioend;
-                p_offset += bh->b_size;
-                nblocks++;
-        } while ((bh = bh->b_this_page) != head);
-        atomic_add(nblocks, &ioend->io_remaining);
-        /* If we reached the end of the page, map forwards in any
-         * following pages which are also covered by this extent.
-         */
-        if (bh == head) {
-                struct address_space    *mapping = inode->i_mapping;
-                pgoff_t                 tindex, tloff, tlast;
-                unsigned long           bs;
-                unsigned int            pg_offset, bbits = inode->i_blkbits;
-                struct page             *page;
-                tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT;
-                tloff = (iomapp->iomap_offset + iomapp->iomap_bsize) >> PAGE_CACHE_SHIFT;
-                tloff = min(tlast, tloff);
-                for (tindex = start_page->index + 1; tindex < tloff; tindex++) {
-                        page = xfs_probe_unwritten_page(mapping,
-                                                tindex, iomapp, ioend,
-                                                PAGE_CACHE_SIZE, &bs, bbits);
-                        if (!page)
-                                break;
-                        nblocks += bs;
-                        atomic_add(bs, &ioend->io_remaining);
-                        xfs_convert_page(inode, page, iomapp, wbc, ioend,
-                                                        startio, all_bh);
-                        /* stop if converting the next page might add
-                         * enough blocks that the corresponding byte
-                         * count won't fit in our ulong page buf length */
-                        if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits))
-                                goto enough;
-                }
-                if (tindex == tlast &&
-                    (pg_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)))) {
-                        page = xfs_probe_unwritten_page(mapping,
-                                                        tindex, iomapp, ioend,
-                                                        pg_offset, &bs, bbits);
-                        if (page) {
-                                nblocks += bs;
-                                atomic_add(bs, &ioend->io_remaining);
-                                xfs_convert_page(inode, page, iomapp, wbc, ioend,
-                                                        startio, all_bh);
-                                if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits))
-                                        goto enough;
-                        }
-                }
        }
-enough:
-        ioend->io_size = (xfs_off_t)nblocks << block_bits;
-        ioend->io_offset = offset;
-        xfs_finish_ioend(ioend);
        return 0;
 }
-STATIC void
-xfs_submit_page(
-        struct page             *page,
-        struct writeback_control *wbc,
-        struct buffer_head      *bh_arr[],
-        int                     bh_count,
-        int                     probed_page,
-        int                     clear_dirty)
-{
-        struct buffer_head      *bh;
-        int                     i;
-        BUG_ON(PageWriteback(page));
-        if (bh_count)
-                set_page_writeback(page);
-        if (clear_dirty)
-                clear_page_dirty(page);
-        unlock_page(page);
-        if (bh_count) {
-                for (i = 0; i < bh_count; i++) {
-                        bh = bh_arr[i];
-                        mark_buffer_async_write(bh);
-                        if (buffer_unwritten(bh))
-                                set_buffer_unwritten_io(bh);
-                        set_buffer_uptodate(bh);
-                        clear_buffer_dirty(bh);
-                }
-                for (i = 0; i < bh_count; i++)
-                        submit_bh(WRITE, bh_arr[i]);
-                if (probed_page && clear_dirty)
-                        wbc->nr_to_write--;     /* Wrote an "extra" page */
-        }
-}
 /*
 * Allocate & map buffers for page given the extent map. Write it out.
 * except for the original page of a writepage, this is called on
 * delalloc/unwritten pages only, for the original page it is possible
 * that the page has no mapping at all.
 */
-STATIC void
+STATIC int
 xfs_convert_page(
        struct inode            *inode,
        struct page             *page,
-        xfs_iomap_t             *iomapp,
+        loff_t                  tindex,
+        xfs_iomap_t             *mp,
+        xfs_ioend_t             **ioendp,
        struct writeback_control *wbc,
-        void                    *private,
        int                     startio,
        int                     all_bh)
 {
-        struct buffer_head      *bh_arr[MAX_BUF_PER_PAGE], *bh, *head;
+        struct buffer_head      *bh, *head;
-        xfs_iomap_t             *mp = iomapp, *tmp;
+        xfs_off_t               end_offset;
-        unsigned long           offset, end_offset;
+        unsigned long           p_offset;
-        int                     index = 0;
+        unsigned int            type;
        int                     bbits = inode->i_blkbits;
        int                     len, page_dirty;
+        int                     count = 0, done = 0, uptodate = 1;
+        xfs_off_t               offset = page_offset(page);
-        end_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1));
+        if (page->index != tindex)
+                goto fail;
+        if (TestSetPageLocked(page))
+                goto fail;
+        if (PageWriteback(page))
+                goto fail_unlock_page;
+        if (page->mapping != inode->i_mapping)
+                goto fail_unlock_page;
+        if (!xfs_is_delayed_page(page, (*ioendp)->io_type))
+                goto fail_unlock_page;
        /*
         * page_dirty is initially a count of buffers on the page before
         * EOF and is decrememted as we move each into a cleanable state.
+         *
+         * Derivation:
+         *
+         * End offset is the highest offset that this page should represent.
+         * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1))
+         * will evaluate non-zero and be less than PAGE_CACHE_SIZE and
+         * hence give us the correct page_dirty count. On any other page,
+         * it will be zero and in that case we need page_dirty to be the
+         * count of buffers on the page.
         */
+        end_offset = min_t(unsigned long long,
+                        (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
+                        i_size_read(inode));
        len = 1 << inode->i_blkbits;
-        end_offset = max(end_offset, PAGE_CACHE_SIZE);
+        p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),
-        end_offset = roundup(end_offset, len);
+                                        PAGE_CACHE_SIZE);
-        page_dirty = end_offset / len;
+        p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
+        page_dirty = p_offset / len;
-        offset = 0;
        bh = head = page_buffers(page);
        do {
                if (offset >= end_offset)
                        break;
-                if (!(PageUptodate(page) || buffer_uptodate(bh)))
+                if (!buffer_uptodate(bh))
+                        uptodate = 0;
+                if (!(PageUptodate(page) || buffer_uptodate(bh))) {
+                        done = 1;
                        continue;
-                if (buffer_mapped(bh) && all_bh &&
+                }
-                    !(buffer_unwritten(bh) || buffer_delay(bh))) {
+                if (buffer_unwritten(bh) || buffer_delay(bh)) {
+                        if (buffer_unwritten(bh))
+                                type = IOMAP_UNWRITTEN;
+                        else
+                                type = IOMAP_DELAY;
+                        if (!xfs_iomap_valid(mp, offset)) {
+                                done = 1;
+                                continue;
+                        }
+                        ASSERT(!(mp->iomap_flags & IOMAP_HOLE));
+                        ASSERT(!(mp->iomap_flags & IOMAP_DELAY));
+                        xfs_map_at_offset(bh, offset, bbits, mp);
                        if (startio) {
+                                xfs_add_to_ioend(inode, bh, offset,
+                                                type, ioendp, done);
+                        } else {
+                                set_buffer_dirty(bh);
+                                unlock_buffer(bh);
+                                mark_buffer_dirty(bh);
+                        }
+                        page_dirty--;
+                        count++;
+                } else {
+                        type = 0;
+                        if (buffer_mapped(bh) && all_bh && startio) {
                                lock_buffer(bh);
-                                bh_arr[index++] = bh;
+                                xfs_add_to_ioend(inode, bh, offset,
+                                                type, ioendp, done);
+                                count++;
                                page_dirty--;
+                        } else {
+                                done = 1;
                        }
-                        continue;
                }
-                tmp = xfs_offset_to_map(page, mp, offset);
+        } while (offset += len, (bh = bh->b_this_page) != head);
-                if (!tmp)
-                        continue;
-                ASSERT(!(tmp->iomap_flags & IOMAP_HOLE));
-                ASSERT(!(tmp->iomap_flags & IOMAP_DELAY));
-                /* If this is a new unwritten extent buffer (i.e. one
+        if (uptodate && bh == head)
-                 * that we haven't passed in private data for, we must
+                SetPageUptodate(page);
-                 * now map this buffer too.
-                 */
+        if (startio) {
-                if (buffer_unwritten(bh) && !bh->b_end_io) {
+                if (count) {
-                        ASSERT(tmp->iomap_flags & IOMAP_UNWRITTEN);
+                        struct backing_dev_info *bdi;
-                        xfs_map_unwritten(inode, page, head, bh, offset,
-                                        bbits, tmp, wbc, startio, all_bh);
+                        bdi = inode->i_mapping->backing_dev_info;
-                } else if (! (buffer_unwritten(bh) && buffer_locked(bh))) {
+                        if (bdi_write_congested(bdi)) {
-                        xfs_map_at_offset(page, bh, offset, bbits, tmp);
+                                wbc->encountered_congestion = 1;
-                        if (buffer_unwritten(bh)) {
+                                done = 1;
-                                set_buffer_unwritten_io(bh);
+                        } else if (--wbc->nr_to_write <= 0) {
-                                bh->b_private = private;
+                                done = 1;
-                                ASSERT(private);
                        }
                }
-                if (startio) {
+                xfs_start_page_writeback(page, wbc, !page_dirty, count);
-                        bh_arr[index++] = bh;
-                } else {
-                        set_buffer_dirty(bh);
-                        unlock_buffer(bh);
-                        mark_buffer_dirty(bh);
-                }
-                page_dirty--;
-        } while (offset += len, (bh = bh->b_this_page) != head);
-        if (startio && index) {
-                xfs_submit_page(page, wbc, bh_arr, index, 1, !page_dirty);
-        } else {
-                unlock_page(page);
        }
+        return done;
+ fail_unlock_page:
+        unlock_page(page);
+ fail:
+        return 1;
 }
 /*
@@ -685,19 +750,31 @@ xfs_cluster_write(
        struct inode            *inode,
        pgoff_t                 tindex,
        xfs_iomap_t             *iomapp,
+        xfs_ioend_t             **ioendp,
        struct writeback_control *wbc,
        int                     startio,
        int                     all_bh,
        pgoff_t                 tlast)
 {
-        struct page             *page;
+        struct pagevec          pvec;
+        int                     done = 0, i;
-        for (; tindex <= tlast; tindex++) {
+        pagevec_init(&pvec, 0);
-                page = xfs_probe_delalloc_page(inode, tindex);
+        while (!done && tindex <= tlast) {
-                if (!page)
+                unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1);
+                if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len))
                        break;
-                xfs_convert_page(inode, page, iomapp, wbc, NULL,
-                                startio, all_bh);
+                for (i = 0; i < pagevec_count(&pvec); i++) {
+                        done = xfs_convert_page(inode, pvec.pages[i], tindex++,
+                                        iomapp, ioendp, wbc, startio, all_bh);
+                        if (done)
+                                break;
+                }
+                pagevec_release(&pvec);
+                cond_resched();
        }
 }
@@ -728,18 +805,22 @@ xfs_page_state_convert(
        int             startio,
        int             unmapped) /* also implies page uptodate */
 {
-        struct buffer_head      *bh_arr[MAX_BUF_PER_PAGE], *bh, *head;
+        struct buffer_head      *bh, *head;
-        xfs_iomap_t             *iomp, iomap;
+        xfs_iomap_t             iomap;
+        xfs_ioend_t             *ioend = NULL, *iohead = NULL;
        loff_t                  offset;
        unsigned long           p_offset = 0;
+        unsigned int            type;
        __uint64_t              end_offset;
        pgoff_t                 end_index, last_index, tlast;
-        int                     len, err, i, cnt = 0, uptodate = 1;
+        ssize_t                 size, len;
-        int                     flags;
+        int                     flags, err, iomap_valid = 0, uptodate = 1;
-        int                     page_dirty;
+        int                     page_dirty, count = 0, trylock_flag = 0;
+        int                     all_bh = unmapped;
        /* wait for other IO threads? */
-        flags = (startio && wbc->sync_mode != WB_SYNC_NONE) ? 0 : BMAPI_TRYLOCK;
+        if (startio && (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking))
+                trylock_flag |= BMAPI_TRYLOCK;
        /* Is this page beyond the end of the file? */
        offset = i_size_read(inode);
@@ -754,161 +835,173 @@ xfs_page_state_convert(
                }
        }
-        end_offset = min_t(unsigned long long,
-                        (loff_t)(page->index + 1) << PAGE_CACHE_SHIFT, offset);
-        offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
        /*
         * page_dirty is initially a count of buffers on the page before
         * EOF and is decrememted as we move each into a cleanable state.
-         */
+         *
+         * Derivation:
+         *
+         * End offset is the highest offset that this page should represent.
+         * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1))
+         * will evaluate non-zero and be less than PAGE_CACHE_SIZE and
+         * hence give us the correct page_dirty count. On any other page,
+         * it will be zero and in that case we need page_dirty to be the
+         * count of buffers on the page.
+         */
+        end_offset = min_t(unsigned long long,
+                        (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, offset);
        len = 1 << inode->i_blkbits;
-        p_offset = max(p_offset, PAGE_CACHE_SIZE);
+        p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),
-        p_offset = roundup(p_offset, len);
+                                        PAGE_CACHE_SIZE);
+        p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
        page_dirty = p_offset / len;
-        iomp = NULL;
-        p_offset = 0;
        bh = head = page_buffers(page);
+        offset = page_offset(page);
+        flags = -1;
+        type = 0;
+        /* TODO: cleanup count and page_dirty */
        do {
                if (offset >= end_offset)
                        break;
                if (!buffer_uptodate(bh))
                        uptodate = 0;
-                if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio)
+                if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio) {
+                        /*
+                         * the iomap is actually still valid, but the ioend
+                         * isn't.  shouldn't happen too often.
+                         */
+                        iomap_valid = 0;
                        continue;
-                if (iomp) {
-                        iomp = xfs_offset_to_map(page, &iomap, p_offset);
                }
+                if (iomap_valid)
+                        iomap_valid = xfs_iomap_valid(&iomap, offset);
                /*
                 * First case, map an unwritten extent and prepare for
                 * extent state conversion transaction on completion.
-                 */
+                 *
-                if (buffer_unwritten(bh)) {
+                 * Second case, allocate space for a delalloc buffer.
-                        if (!startio)
+                 * We can return EAGAIN here in the release page case.
-                                continue;
+                 *
-                        if (!iomp) {
+                 * Third case, an unmapped buffer was found, and we are
-                                err = xfs_map_blocks(inode, offset, len, &iomap,
+                 * in a path where we need to write the whole page out.
-                                                BMAPI_WRITE|BMAPI_IGNSTATE);
+                 */
-                                if (err) {
+                if (buffer_unwritten(bh) || buffer_delay(bh) ||
-                                        goto error;
+                    ((buffer_uptodate(bh) || PageUptodate(page)) &&
-                                }
+                     !buffer_mapped(bh) && (unmapped || startio))) {
-                                iomp = xfs_offset_to_map(page, &iomap,
+                        /*
-                                                                p_offset);
+                         * Make sure we don't use a read-only iomap
+                         */
+                        if (flags == BMAPI_READ)
+                                iomap_valid = 0;
+                        if (buffer_unwritten(bh)) {
+                                type = IOMAP_UNWRITTEN;
+                                flags = BMAPI_WRITE|BMAPI_IGNSTATE;
+                        } else if (buffer_delay(bh)) {
+                                type = IOMAP_DELAY;
+                                flags = BMAPI_ALLOCATE;
+                                if (!startio)
+                                        flags |= trylock_flag;
+                        } else {
+                                type = IOMAP_NEW;
+                                flags = BMAPI_WRITE|BMAPI_MMAP;
                        }
-                        if (iomp) {
-                                if (!bh->b_end_io) {
+                        if (!iomap_valid) {
-                                        err = xfs_map_unwritten(inode, page,
+                                if (type == IOMAP_NEW) {
-                                                        head, bh, p_offset,
+                                        size = xfs_probe_cluster(inode,
-                                                        inode->i_blkbits, iomp,
+                                                        page, bh, head, 0);
-                                                        wbc, startio, unmapped);
-                                        if (err) {
-                                                goto error;
-                                        }
                                } else {
-                                        set_bit(BH_Lock, &bh->b_state);
+                                        size = len;
                                }
-                                BUG_ON(!buffer_locked(bh));
-                                bh_arr[cnt++] = bh;
+                                err = xfs_map_blocks(inode, offset, size,
-                                page_dirty--;
+                                                &iomap, flags);
-                        }
+                                if (err)
-                /*
-                 * Second case, allocate space for a delalloc buffer.
-                 * We can return EAGAIN here in the release page case.
-                 */
-                } else if (buffer_delay(bh)) {
-                        if (!iomp) {
-                                err = xfs_map_blocks(inode, offset, len, &iomap,
-                                                BMAPI_ALLOCATE | flags);
-                                if (err) {
                                        goto error;
-                                }
+                                iomap_valid = xfs_iomap_valid(&iomap, offset);
-                                iomp = xfs_offset_to_map(page, &iomap,
-                                                                p_offset);
                        }
-                        if (iomp) {
+                        if (iomap_valid) {
-                                xfs_map_at_offset(page, bh, p_offset,
+                                xfs_map_at_offset(bh, offset,
-                                                inode->i_blkbits, iomp);
+                                                inode->i_blkbits, &iomap);
                                if (startio) {
-                                        bh_arr[cnt++] = bh;
+                                        xfs_add_to_ioend(inode, bh, offset,
+                                                        type, &ioend,
+                                                        !iomap_valid);
                                } else {
                                        set_buffer_dirty(bh);
                                        unlock_buffer(bh);
                                        mark_buffer_dirty(bh);
                                }
                                page_dirty--;
+                                count++;
+                        }
+                } else if (buffer_uptodate(bh) && startio) {
+                        /*
+                         * we got here because the buffer is already mapped.
+                         * That means it must already have extents allocated
+                         * underneath it. Map the extent by reading it.
+                         */
+                        if (!iomap_valid || type != 0) {
+                                flags = BMAPI_READ;
+                                size = xfs_probe_cluster(inode, page, bh,
+                                                                head, 1);
+                                err = xfs_map_blocks(inode, offset, size,
+                                                &iomap, flags);
+                                if (err)
+                                        goto error;
+                                iomap_valid = xfs_iomap_valid(&iomap, offset);
                        }
-                } else if ((buffer_uptodate(bh) || PageUptodate(page)) &&
-                           (unmapped || startio)) {
-                        if (!buffer_mapped(bh)) {
+                        type = 0;
-                                int     size;
+                        if (!test_and_set_bit(BH_Lock, &bh->b_state)) {
+                                ASSERT(buffer_mapped(bh));
-                                /*
+                                if (iomap_valid)
-                                 * Getting here implies an unmapped buffer
+                                        all_bh = 1;
-                                 * was found, and we are in a path where we
+                                xfs_add_to_ioend(inode, bh, offset, type,
-                                 * need to write the whole page out.
+                                                &ioend, !iomap_valid);
-                                 */
+                                page_dirty--;
-                                if (!iomp) {
+                                count++;
-                                        size = xfs_probe_unmapped_cluster(
+                        } else {
-                                                        inode, page, bh, head);
+                                iomap_valid = 0;
-                                        err = xfs_map_blocks(inode, offset,
-                                                        size, &iomap,
-                                                        BMAPI_WRITE|BMAPI_MMAP);
-                                        if (err) {
-                                                goto error;
-                                        }
-                                        iomp = xfs_offset_to_map(page, &iomap,
-                                                                     p_offset);
-                                }
-                                if (iomp) {
-                                        xfs_map_at_offset(page,
-                                                        bh, p_offset,
-                                                        inode->i_blkbits, iomp);
-                                        if (startio) {
-                                                bh_arr[cnt++] = bh;
-                                        } else {
-                                                set_buffer_dirty(bh);
-                                                unlock_buffer(bh);
-                                                mark_buffer_dirty(bh);
-                                        }
-                                        page_dirty--;
-                                }
-                        } else if (startio) {
-                                if (buffer_uptodate(bh) &&
-                                    !test_and_set_bit(BH_Lock, &bh->b_state)) {
-                                        bh_arr[cnt++] = bh;
-                                        page_dirty--;
-                                }
                        }
+                } else if ((buffer_uptodate(bh) || PageUptodate(page)) &&
+                           (unmapped || startio)) {
+                        iomap_valid = 0;
                }
-        } while (offset += len, p_offset += len,
-                ((bh = bh->b_this_page) != head));
+                if (!iohead)
+                        iohead = ioend;
+        } while (offset += len, ((bh = bh->b_this_page) != head));
        if (uptodate && bh == head)
                SetPageUptodate(page);
-        if (startio) {
+        if (startio)
-                xfs_submit_page(page, wbc, bh_arr, cnt, 0, !page_dirty);
+                xfs_start_page_writeback(page, wbc, 1, count);
-        }
-        if (iomp) {
+        if (ioend && iomap_valid) {
-                offset = (iomp->iomap_offset + iomp->iomap_bsize - 1) >>
+                offset = (iomap.iomap_offset + iomap.iomap_bsize - 1) >>
                                        PAGE_CACHE_SHIFT;
                tlast = min_t(pgoff_t, offset, last_index);
-                xfs_cluster_write(inode, page->index + 1, iomp, wbc,
+                xfs_cluster_write(inode, page->index + 1, &iomap, &ioend,
-                                        startio, unmapped, tlast);
+                                        wbc, startio, all_bh, tlast);
        }
+        if (iohead)
+                xfs_submit_ioend(iohead);
        return page_dirty;
 error:
-        for (i = 0; i < cnt; i++) {
+        if (iohead)
-                unlock_buffer(bh_arr[i]);
+                xfs_cancel_ioend(iohead);
-        }
        /*
         * If it's delalloc and we have nowhere to put it,
@@ -916,9 +1009,8 @@ error:
         * us to try again.
         */
        if (err != -EAGAIN) {
-                if (!unmapped) {
+                if (!unmapped)
                        block_invalidatepage(page, 0);
-                }
                ClearPageUptodate(page);
        }
        return err;
@@ -982,7 +1074,7 @@ __linvfs_get_block(
        }
        /* If this is a realtime file, data might be on a new device */
-        bh_result->b_bdev = iomap.iomap_target->pbr_bdev;
+        bh_result->b_bdev = iomap.iomap_target->bt_bdev;
        /* If we previously allocated a block out beyond eof and
         * we are now coming back to use it then we will need to
@@ -1094,10 +1186,10 @@ linvfs_direct_IO(
        if (error)
                return -error;
-        iocb->private = xfs_alloc_ioend(inode);
+        iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN);
        ret = blockdev_direct_IO_own_locking(rw, iocb, inode,
-                iomap.iomap_target->pbr_bdev,
+                iomap.iomap_target->bt_bdev,
                iov, offset, nr_segs,
                linvfs_get_blocks_direct,
                linvfs_end_io_direct);
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
index 4720758a9ade..55339dd5a30d 100644
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -23,14 +23,24 @@ extern mempool_t *xfs_ioend_pool;
 typedef void (*xfs_ioend_func_t)(void *);
+/*
+ * xfs_ioend struct manages large extent writes for XFS.
+ * It can manage several multi-page bio's at once.
+ */
 typedef struct xfs_ioend {
+        struct xfs_ioend        *io_list;       /* next ioend in chain */
+        unsigned int            io_type;        /* delalloc / unwritten */
        unsigned int            io_uptodate;    /* I/O status register */
        atomic_t                io_remaining;   /* hold count */
        struct vnode            *io_vnode;      /* file being written to */
        struct buffer_head      *io_buffer_head;/* buffer linked list head */
+        struct buffer_head      *io_buffer_tail;/* buffer linked list tail */
        size_t                  io_size;        /* size of the extent */
        xfs_off_t               io_offset;      /* offset in the file */
        struct work_struct      io_work;        /* xfsdatad work queue */
 } xfs_ioend_t;
+extern struct address_space_operations linvfs_aops;
+extern int linvfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
 #endif /* __XFS_IOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 6fe21d2b8847..e44b7c1a3a36 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -31,76 +31,77 @@
 #include <linux/kthread.h>
 #include "xfs_linux.h"
-STATIC kmem_cache_t *pagebuf_zone;
+STATIC kmem_zone_t *xfs_buf_zone;
-STATIC kmem_shaker_t pagebuf_shake;
+STATIC kmem_shaker_t xfs_buf_shake;
+STATIC int xfsbufd(void *);
 STATIC int xfsbufd_wakeup(int, gfp_t);
-STATIC void pagebuf_delwri_queue(xfs_buf_t *, int);
+STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
 STATIC struct workqueue_struct *xfslogd_workqueue;
 struct workqueue_struct *xfsdatad_workqueue;
-#ifdef PAGEBUF_TRACE
+#ifdef XFS_BUF_TRACE
 void
-pagebuf_trace(
+xfs_buf_trace(
-        xfs_buf_t       *pb,
+        xfs_buf_t       *bp,
        char            *id,
        void            *data,
        void            *ra)
 {
-        ktrace_enter(pagebuf_trace_buf,
+        ktrace_enter(xfs_buf_trace_buf,
-                pb, id,
+                bp, id,
-                (void *)(unsigned long)pb->pb_flags,
+                (void *)(unsigned long)bp->b_flags,
-                (void *)(unsigned long)pb->pb_hold.counter,
+                (void *)(unsigned long)bp->b_hold.counter,
-                (void *)(unsigned long)pb->pb_sema.count.counter,
+                (void *)(unsigned long)bp->b_sema.count.counter,
                (void *)current,
                data, ra,
-                (void *)(unsigned long)((pb->pb_file_offset>>32) & 0xffffffff),
+                (void *)(unsigned long)((bp->b_file_offset>>32) & 0xffffffff),
-                (void *)(unsigned long)(pb->pb_file_offset & 0xffffffff),
+                (void *)(unsigned long)(bp->b_file_offset & 0xffffffff),
-                (void *)(unsigned long)pb->pb_buffer_length,
+                (void *)(unsigned long)bp->b_buffer_length,
                NULL, NULL, NULL, NULL, NULL);
 }
-ktrace_t *pagebuf_trace_buf;
+ktrace_t *xfs_buf_trace_buf;
-#define PAGEBUF_TRACE_SIZE      4096
+#define XFS_BUF_TRACE_SIZE      4096
-#define PB_TRACE(pb, id, data)  \
+#define XB_TRACE(bp, id, data)  \
-        pagebuf_trace(pb, id, (void *)data, (void *)__builtin_return_address(0))
+        xfs_buf_trace(bp, id, (void *)data, (void *)__builtin_return_address(0))
 #else
-#define PB_TRACE(pb, id, data)  do { } while (0)
+#define XB_TRACE(bp, id, data)  do { } while (0)
 #endif
-#ifdef PAGEBUF_LOCK_TRACKING
+#ifdef XFS_BUF_LOCK_TRACKING
-# define PB_SET_OWNER(pb)       ((pb)->pb_last_holder = current->pid)
+# define XB_SET_OWNER(bp)       ((bp)->b_last_holder = current->pid)
-# define PB_CLEAR_OWNER(pb)     ((pb)->pb_last_holder = -1)
+# define XB_CLEAR_OWNER(bp)     ((bp)->b_last_holder = -1)
-# define PB_GET_OWNER(pb)       ((pb)->pb_last_holder)
+# define XB_GET_OWNER(bp)       ((bp)->b_last_holder)
 #else
-# define PB_SET_OWNER(pb)       do { } while (0)
+# define XB_SET_OWNER(bp)       do { } while (0)
-# define PB_CLEAR_OWNER(pb)     do { } while (0)
+# define XB_CLEAR_OWNER(bp)     do { } while (0)
-# define PB_GET_OWNER(pb)       do { } while (0)
+# define XB_GET_OWNER(bp)       do { } while (0)
 #endif
-#define pb_to_gfp(flags) \
+#define xb_to_gfp(flags) \
-        ((((flags) & PBF_READ_AHEAD) ? __GFP_NORETRY : \
+        ((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : \
-          ((flags) & PBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN)
+          ((flags) & XBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN)
-#define pb_to_km(flags) \
+#define xb_to_km(flags) \
-         (((flags) & PBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP)
+         (((flags) & XBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP)
-#define pagebuf_allocate(flags) \
+#define xfs_buf_allocate(flags) \
-        kmem_zone_alloc(pagebuf_zone, pb_to_km(flags))
+        kmem_zone_alloc(xfs_buf_zone, xb_to_km(flags))
-#define pagebuf_deallocate(pb) \
+#define xfs_buf_deallocate(bp) \
-        kmem_zone_free(pagebuf_zone, (pb));
+        kmem_zone_free(xfs_buf_zone, (bp));
 /*
- * Page Region interfaces.
+ *      Page Region interfaces.
 *
- * For pages in filesystems where the blocksize is smaller than the
+ *      For pages in filesystems where the blocksize is smaller than the
- * pagesize, we use the page->private field (long) to hold a bitmap
+ *      pagesize, we use the page->private field (long) to hold a bitmap
- * of uptodate regions within the page.
+ *      of uptodate regions within the page.
 *
- * Each such region is "bytes per page / bits per long" bytes long.
+ *      Each such region is "bytes per page / bits per long" bytes long.
 *
- * NBPPR == number-of-bytes-per-page-region
+ *      NBPPR == number-of-bytes-per-page-region
- * BTOPR == bytes-to-page-region (rounded up)
+ *      BTOPR == bytes-to-page-region (rounded up)
- * BTOPRT == bytes-to-page-region-truncated (rounded down)
+ *      BTOPRT == bytes-to-page-region-truncated (rounded down)
 */
 #if (BITS_PER_LONG == 32)
 #define PRSHIFT         (PAGE_CACHE_SHIFT - 5)  /* (32 == 1<<5) */
@@ -159,7 +160,7 @@ test_page_region(
 }
 /*
- * Mapping of multi-page buffers into contiguous virtual space
+ *      Mapping of multi-page buffers into contiguous virtual space
 */
 typedef struct a_list {
@@ -172,7 +173,7 @@ STATIC int		as_list_len;
 STATIC DEFINE_SPINLOCK(as_lock);
 /*
- * Try to batch vunmaps because they are costly.
+ *      Try to batch vunmaps because they are costly.
 */
 STATIC void
 free_address(
@@ -215,83 +216,83 @@ purge_addresses(void)
 }
 /*
- *      Internal pagebuf object manipulation
+ *      Internal xfs_buf_t object manipulation
 */
 STATIC void
-_pagebuf_initialize(
+_xfs_buf_initialize(
-        xfs_buf_t               *pb,
+        xfs_buf_t               *bp,
        xfs_buftarg_t           *target,
-        loff_t                  range_base,
+        xfs_off_t               range_base,
        size_t                  range_length,
-        page_buf_flags_t        flags)
+        xfs_buf_flags_t         flags)
 {
        /*
-         * We don't want certain flags to appear in pb->pb_flags.
+         * We don't want certain flags to appear in b_flags.
         */
-        flags &= ~(PBF_LOCK|PBF_MAPPED|PBF_DONT_BLOCK|PBF_READ_AHEAD);
+        flags &= ~(XBF_LOCK|XBF_MAPPED|XBF_DONT_BLOCK|XBF_READ_AHEAD);
-        memset(pb, 0, sizeof(xfs_buf_t));
+        memset(bp, 0, sizeof(xfs_buf_t));
-        atomic_set(&pb->pb_hold, 1);
+        atomic_set(&bp->b_hold, 1);
-        init_MUTEX_LOCKED(&pb->pb_iodonesema);
+        init_MUTEX_LOCKED(&bp->b_iodonesema);
-        INIT_LIST_HEAD(&pb->pb_list);
+        INIT_LIST_HEAD(&bp->b_list);
-        INIT_LIST_HEAD(&pb->pb_hash_list);
+        INIT_LIST_HEAD(&bp->b_hash_list);
-        init_MUTEX_LOCKED(&pb->pb_sema); /* held, no waiters */
+        init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */
-        PB_SET_OWNER(pb);
+        XB_SET_OWNER(bp);
-        pb->pb_target = target;
+        bp->b_target = target;
-        pb->pb_file_offset = range_base;
+        bp->b_file_offset = range_base;
        /*
         * Set buffer_length and count_desired to the same value initially.
         * I/O routines should use count_desired, which will be the same in
         * most cases but may be reset (e.g. XFS recovery).
         */
-        pb->pb_buffer_length = pb->pb_count_desired = range_length;
+        bp->b_buffer_length = bp->b_count_desired = range_length;
-        pb->pb_flags = flags;
+        bp->b_flags = flags;
-        pb->pb_bn = XFS_BUF_DADDR_NULL;
+        bp->b_bn = XFS_BUF_DADDR_NULL;
-        atomic_set(&pb->pb_pin_count, 0);
+        atomic_set(&bp->b_pin_count, 0);
-        init_waitqueue_head(&pb->pb_waiters);
+        init_waitqueue_head(&bp->b_waiters);
-        XFS_STATS_INC(pb_create);
+        XFS_STATS_INC(xb_create);
-        PB_TRACE(pb, "initialize", target);
+        XB_TRACE(bp, "initialize", target);
 }
 /*
- * Allocate a page array capable of holding a specified number
+ *      Allocate a page array capable of holding a specified number
- * of pages, and point the page buf at it.
+ *      of pages, and point the page buf at it.
 */
 STATIC int
-_pagebuf_get_pages(
+_xfs_buf_get_pages(
-        xfs_buf_t               *pb,
+        xfs_buf_t               *bp,
        int                     page_count,
-        page_buf_flags_t        flags)
+        xfs_buf_flags_t         flags)
 {
        /* Make sure that we have a page list */
-        if (pb->pb_pages == NULL) {
+        if (bp->b_pages == NULL) {
-                pb->pb_offset = page_buf_poff(pb->pb_file_offset);
+                bp->b_offset = xfs_buf_poff(bp->b_file_offset);
-                pb->pb_page_count = page_count;
+                bp->b_page_count = page_count;
-                if (page_count <= PB_PAGES) {
+                if (page_count <= XB_PAGES) {
-                        pb->pb_pages = pb->pb_page_array;
+                        bp->b_pages = bp->b_page_array;
                } else {
-                        pb->pb_pages = kmem_alloc(sizeof(struct page *) *
+                        bp->b_pages = kmem_alloc(sizeof(struct page *) *
-                                        page_count, pb_to_km(flags));
+                                        page_count, xb_to_km(flags));
-                        if (pb->pb_pages == NULL)
+                        if (bp->b_pages == NULL)
                                return -ENOMEM;
                }
-                memset(pb->pb_pages, 0, sizeof(struct page *) * page_count);
+                memset(bp->b_pages, 0, sizeof(struct page *) * page_count);
        }
        return 0;
 }
 /*
- *      Frees pb_pages if it was malloced.
+ *      Frees b_pages if it was allocated.
 */
 STATIC void
-_pagebuf_free_pages(
+_xfs_buf_free_pages(
        xfs_buf_t       *bp)
 {
-        if (bp->pb_pages != bp->pb_page_array) {
+        if (bp->b_pages != bp->b_page_array) {
-                kmem_free(bp->pb_pages,
+                kmem_free(bp->b_pages,
-                          bp->pb_page_count * sizeof(struct page *));
+                          bp->b_page_count * sizeof(struct page *));
        }
 }
@@ -299,79 +300,79 @@ _pagebuf_free_pages(
 *      Releases the specified buffer.
 *
 *      The modification state of any associated pages is left unchanged.
- *      The buffer most not be on any hash - use pagebuf_rele instead for
+ *      The buffer most not be on any hash - use xfs_buf_rele instead for
 *      hashed and refcounted buffers
 */
 void
-pagebuf_free(
+xfs_buf_free(
        xfs_buf_t               *bp)
 {
-        PB_TRACE(bp, "free", 0);
+        XB_TRACE(bp, "free", 0);
-        ASSERT(list_empty(&bp->pb_hash_list));
+        ASSERT(list_empty(&bp->b_hash_list));
-        if (bp->pb_flags & _PBF_PAGE_CACHE) {
+        if (bp->b_flags & _XBF_PAGE_CACHE) {
                uint            i;
-                if ((bp->pb_flags & PBF_MAPPED) && (bp->pb_page_count > 1))
+                if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1))
-                        free_address(bp->pb_addr - bp->pb_offset);
+                        free_address(bp->b_addr - bp->b_offset);
-                for (i = 0; i < bp->pb_page_count; i++)
+                for (i = 0; i < bp->b_page_count; i++)
-                        page_cache_release(bp->pb_pages[i]);
+                        page_cache_release(bp->b_pages[i]);
-                _pagebuf_free_pages(bp);
+                _xfs_buf_free_pages(bp);
-        } else if (bp->pb_flags & _PBF_KMEM_ALLOC) {
+        } else if (bp->b_flags & _XBF_KMEM_ALLOC) {
                 /*
-                  * XXX(hch): bp->pb_count_desired might be incorrect (see
+                  * XXX(hch): bp->b_count_desired might be incorrect (see
-                  * pagebuf_associate_memory for details), but fortunately
+                  * xfs_buf_associate_memory for details), but fortunately
                  * the Linux version of kmem_free ignores the len argument..
                  */
-                kmem_free(bp->pb_addr, bp->pb_count_desired);
+                kmem_free(bp->b_addr, bp->b_count_desired);
-                _pagebuf_free_pages(bp);
+                _xfs_buf_free_pages(bp);
        }
-        pagebuf_deallocate(bp);
+        xfs_buf_deallocate(bp);
 }
 /*
 *      Finds all pages for buffer in question and builds it's page list.
 */
 STATIC int
-_pagebuf_lookup_pages(
+_xfs_buf_lookup_pages(
        xfs_buf_t               *bp,
        uint                    flags)
 {
-        struct address_space    *mapping = bp->pb_target->pbr_mapping;
+        struct address_space    *mapping = bp->b_target->bt_mapping;
-        size_t                  blocksize = bp->pb_target->pbr_bsize;
+        size_t                  blocksize = bp->b_target->bt_bsize;
-        size_t                  size = bp->pb_count_desired;
+        size_t                  size = bp->b_count_desired;
        size_t                  nbytes, offset;
-        gfp_t                   gfp_mask = pb_to_gfp(flags);
+        gfp_t                   gfp_mask = xb_to_gfp(flags);
        unsigned short          page_count, i;
        pgoff_t                 first;
-        loff_t                  end;
+        xfs_off_t               end;
        int                     error;
-        end = bp->pb_file_offset + bp->pb_buffer_length;
+        end = bp->b_file_offset + bp->b_buffer_length;
-        page_count = page_buf_btoc(end) - page_buf_btoct(bp->pb_file_offset);
+        page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset);
-        error = _pagebuf_get_pages(bp, page_count, flags);
+        error = _xfs_buf_get_pages(bp, page_count, flags);
        if (unlikely(error))
                return error;
-        bp->pb_flags |= _PBF_PAGE_CACHE;
+        bp->b_flags |= _XBF_PAGE_CACHE;
-        offset = bp->pb_offset;
+        offset = bp->b_offset;
-        first = bp->pb_file_offset >> PAGE_CACHE_SHIFT;
+        first = bp->b_file_offset >> PAGE_CACHE_SHIFT;
-        for (i = 0; i < bp->pb_page_count; i++) {
+        for (i = 0; i < bp->b_page_count; i++) {
                struct page     *page;
                uint            retries = 0;
              retry:
                page = find_or_create_page(mapping, first + i, gfp_mask);
                if (unlikely(page == NULL)) {
-                        if (flags & PBF_READ_AHEAD) {
+                        if (flags & XBF_READ_AHEAD) {
-                                bp->pb_page_count = i;
+                                bp->b_page_count = i;
-                                for (i = 0; i < bp->pb_page_count; i++)
+                                for (i = 0; i < bp->b_page_count; i++)
-                                        unlock_page(bp->pb_pages[i]);
+                                        unlock_page(bp->b_pages[i]);
                                return -ENOMEM;
                        }
@@ -387,13 +388,13 @@ _pagebuf_lookup_pages(
                                        "deadlock in %s (mode:0x%x)\n",
                                        __FUNCTION__, gfp_mask);
-                        XFS_STATS_INC(pb_page_retries);
+                        XFS_STATS_INC(xb_page_retries);
                        xfsbufd_wakeup(0, gfp_mask);
                        blk_congestion_wait(WRITE, HZ/50);
                        goto retry;
                }
-                XFS_STATS_INC(pb_page_found);
+                XFS_STATS_INC(xb_page_found);
                nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset);
                size -= nbytes;
@@ -401,27 +402,27 @@ _pagebuf_lookup_pages(
                if (!PageUptodate(page)) {
                        page_count--;
                        if (blocksize >= PAGE_CACHE_SIZE) {
-                                if (flags & PBF_READ)
+                                if (flags & XBF_READ)
-                                        bp->pb_locked = 1;
+                                        bp->b_locked = 1;
                        } else if (!PagePrivate(page)) {
                                if (test_page_region(page, offset, nbytes))
                                        page_count++;
                        }
                }
-                bp->pb_pages[i] = page;
+                bp->b_pages[i] = page;
                offset = 0;
        }
-        if (!bp->pb_locked) {
+        if (!bp->b_locked) {
-                for (i = 0; i < bp->pb_page_count; i++)
+                for (i = 0; i < bp->b_page_count; i++)
-                        unlock_page(bp->pb_pages[i]);
+                        unlock_page(bp->b_pages[i]);
        }
-        if (page_count == bp->pb_page_count)
+        if (page_count == bp->b_page_count)
-                bp->pb_flags |= PBF_DONE;
+                bp->b_flags |= XBF_DONE;
-        PB_TRACE(bp, "lookup_pages", (long)page_count);
+        XB_TRACE(bp, "lookup_pages", (long)page_count);
        return error;
 }
@@ -429,23 +430,23 @@ _pagebuf_lookup_pages(
 *      Map buffer into kernel address-space if nessecary.
 */
 STATIC int
-_pagebuf_map_pages(
+_xfs_buf_map_pages(
        xfs_buf_t               *bp,
        uint                    flags)
 {
        /* A single page buffer is always mappable */
-        if (bp->pb_page_count == 1) {
+        if (bp->b_page_count == 1) {
-                bp->pb_addr = page_address(bp->pb_pages[0]) + bp->pb_offset;
+                bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset;
-                bp->pb_flags |= PBF_MAPPED;
+                bp->b_flags |= XBF_MAPPED;
-        } else if (flags & PBF_MAPPED) {
+        } else if (flags & XBF_MAPPED) {
                if (as_list_len > 64)
                        purge_addresses();
-                bp->pb_addr = vmap(bp->pb_pages, bp->pb_page_count,
+                bp->b_addr = vmap(bp->b_pages, bp->b_page_count,
-                                VM_MAP, PAGE_KERNEL);
+                                        VM_MAP, PAGE_KERNEL);
-                if (unlikely(bp->pb_addr == NULL))
+                if (unlikely(bp->b_addr == NULL))
                        return -ENOMEM;
-                bp->pb_addr += bp->pb_offset;
+                bp->b_addr += bp->b_offset;
-                bp->pb_flags |= PBF_MAPPED;
+                bp->b_flags |= XBF_MAPPED;
        }
        return 0;
@@ -456,9 +457,7 @@ _pagebuf_map_pages(
 */
 /*
- *      _pagebuf_find
+ *      Look up, and creates if absent, a lockable buffer for
- *
- *      Looks up, and creates if absent, a lockable buffer for
 *      a given range of an inode.  The buffer is returned
 *      locked.  If other overlapping buffers exist, they are
 *      released before the new buffer is created and locked,
@@ -466,55 +465,55 @@ _pagebuf_map_pages(
 *      are unlocked.  No I/O is implied by this call.
 */
 xfs_buf_t *
-_pagebuf_find(
+_xfs_buf_find(
        xfs_buftarg_t           *btp,   /* block device target          */
-        loff_t                  ioff,   /* starting offset of range     */
+        xfs_off_t               ioff,   /* starting offset of range     */
        size_t                  isize,  /* length of range              */
-        page_buf_flags_t        flags,  /* PBF_TRYLOCK                  */
+        xfs_buf_flags_t         flags,
-        xfs_buf_t               *new_pb)/* newly allocated buffer       */
+        xfs_buf_t               *new_bp)
 {
-        loff_t                  range_base;
+        xfs_off_t               range_base;
        size_t                  range_length;
        xfs_bufhash_t           *hash;
-        xfs_buf_t               *pb, *n;
+        xfs_buf_t               *bp, *n;
        range_base = (ioff << BBSHIFT);
        range_length = (isize << BBSHIFT);
        /* Check for IOs smaller than the sector size / not sector aligned */
-        ASSERT(!(range_length < (1 << btp->pbr_sshift)));
+        ASSERT(!(range_length < (1 << btp->bt_sshift)));
-        ASSERT(!(range_base & (loff_t)btp->pbr_smask));
+        ASSERT(!(range_base & (xfs_off_t)btp->bt_smask));
        hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)];
        spin_lock(&hash->bh_lock);
-        list_for_each_entry_safe(pb, n, &hash->bh_list, pb_hash_list) {
+        list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) {
-                ASSERT(btp == pb->pb_target);
+                ASSERT(btp == bp->b_target);
-                if (pb->pb_file_offset == range_base &&
+                if (bp->b_file_offset == range_base &&
-                    pb->pb_buffer_length == range_length) {
+                    bp->b_buffer_length == range_length) {
                        /*
-                         * If we look at something bring it to the
+                         * If we look at something, bring it to the
                         * front of the list for next time.
                         */
-                        atomic_inc(&pb->pb_hold);
+                        atomic_inc(&bp->b_hold);
-                        list_move(&pb->pb_hash_list, &hash->bh_list);
+                        list_move(&bp->b_hash_list, &hash->bh_list);
                        goto found;
                }
        }
        /* No match found */
-        if (new_pb) {
+        if (new_bp) {
-                _pagebuf_initialize(new_pb, btp, range_base,
+                _xfs_buf_initialize(new_bp, btp, range_base,
                                range_length, flags);
-                new_pb->pb_hash = hash;
+                new_bp->b_hash = hash;
-                list_add(&new_pb->pb_hash_list, &hash->bh_list);
+                list_add(&new_bp->b_hash_list, &hash->bh_list);
        } else {
-                XFS_STATS_INC(pb_miss_locked);
+                XFS_STATS_INC(xb_miss_locked);
        }
        spin_unlock(&hash->bh_lock);
-        return new_pb;
+        return new_bp;
 found:
        spin_unlock(&hash->bh_lock);
@@ -523,74 +522,72 @@ found:
         * if this does not work then we need to drop the
         * spinlock and do a hard attempt on the semaphore.
         */
-        if (down_trylock(&pb->pb_sema)) {
+        if (down_trylock(&bp->b_sema)) {
-                if (!(flags & PBF_TRYLOCK)) {
+                if (!(flags & XBF_TRYLOCK)) {
                        /* wait for buffer ownership */
-                        PB_TRACE(pb, "get_lock", 0);
+                        XB_TRACE(bp, "get_lock", 0);
-                        pagebuf_lock(pb);
+                        xfs_buf_lock(bp);
-                        XFS_STATS_INC(pb_get_locked_waited);
+                        XFS_STATS_INC(xb_get_locked_waited);
                } else {
                        /* We asked for a trylock and failed, no need
                         * to look at file offset and length here, we
-                         * know that this pagebuf at least overlaps our
+                         * know that this buffer at least overlaps our
-                         * pagebuf and is locked, therefore our buffer
+                         * buffer and is locked, therefore our buffer
-                         * either does not exist, or is this buffer
+                         * either does not exist, or is this buffer.
                         */
+                        xfs_buf_rele(bp);
-                        pagebuf_rele(pb);
+                        XFS_STATS_INC(xb_busy_locked);
-                        XFS_STATS_INC(pb_busy_locked);
+                        return NULL;
-                        return (NULL);
                }
        } else {
                /* trylock worked */
-                PB_SET_OWNER(pb);
+                XB_SET_OWNER(bp);
        }
-        if (pb->pb_flags & PBF_STALE) {
+        if (bp->b_flags & XBF_STALE) {
-                ASSERT((pb->pb_flags & _PBF_DELWRI_Q) == 0);
+                ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);
-                pb->pb_flags &= PBF_MAPPED;
+                bp->b_flags &= XBF_MAPPED;
        }
-        PB_TRACE(pb, "got_lock", 0);
+        XB_TRACE(bp, "got_lock", 0);
-        XFS_STATS_INC(pb_get_locked);
+        XFS_STATS_INC(xb_get_locked);
-        return (pb);
+        return bp;
 }
 /*
- *      xfs_buf_get_flags assembles a buffer covering the specified range.
+ *      Assembles a buffer covering the specified range.
- *
 *      Storage in memory for all portions of the buffer will be allocated,
 *      although backing storage may not be.
 */
 xfs_buf_t *
-xfs_buf_get_flags(                      /* allocate a buffer            */
+xfs_buf_get_flags(
        xfs_buftarg_t           *target,/* target for buffer            */
-        loff_t                  ioff,   /* starting offset of range     */
+        xfs_off_t               ioff,   /* starting offset of range     */
        size_t                  isize,  /* length of range              */
-        page_buf_flags_t        flags)  /* PBF_TRYLOCK                  */
+        xfs_buf_flags_t         flags)
 {
-        xfs_buf_t               *pb, *new_pb;
+        xfs_buf_t               *bp, *new_bp;
        int                     error = 0, i;
-        new_pb = pagebuf_allocate(flags);
+        new_bp = xfs_buf_allocate(flags);
-        if (unlikely(!new_pb))
+        if (unlikely(!new_bp))
                return NULL;
-        pb = _pagebuf_find(target, ioff, isize, flags, new_pb);
+        bp = _xfs_buf_find(target, ioff, isize, flags, new_bp);
-        if (pb == new_pb) {
+        if (bp == new_bp) {
-                error = _pagebuf_lookup_pages(pb, flags);
+                error = _xfs_buf_lookup_pages(bp, flags);
                if (error)
                        goto no_buffer;
        } else {
-                pagebuf_deallocate(new_pb);
+                xfs_buf_deallocate(new_bp);
-                if (unlikely(pb == NULL))
+                if (unlikely(bp == NULL))
                        return NULL;
        }
-        for (i = 0; i < pb->pb_page_count; i++)
+        for (i = 0; i < bp->b_page_count; i++)
-                mark_page_accessed(pb->pb_pages[i]);
+                mark_page_accessed(bp->b_pages[i]);
-        if (!(pb->pb_flags & PBF_MAPPED)) {
+        if (!(bp->b_flags & XBF_MAPPED)) {
-                error = _pagebuf_map_pages(pb, flags);
+                error = _xfs_buf_map_pages(bp, flags);
                if (unlikely(error)) {
                        printk(KERN_WARNING "%s: failed to map pages\n",
                                        __FUNCTION__);
@@ -598,97 +595,97 @@ xfs_buf_get_flags(			/* allocate a buffer		*/
                }
        }
-        XFS_STATS_INC(pb_get);
+        XFS_STATS_INC(xb_get);
        /*
         * Always fill in the block number now, the mapped cases can do
         * their own overlay of this later.
         */
-        pb->pb_bn = ioff;
+        bp->b_bn = ioff;
-        pb->pb_count_desired = pb->pb_buffer_length;
+        bp->b_count_desired = bp->b_buffer_length;
-        PB_TRACE(pb, "get", (unsigned long)flags);
+        XB_TRACE(bp, "get", (unsigned long)flags);
-        return pb;
+        return bp;
 no_buffer:
-        if (flags & (PBF_LOCK | PBF_TRYLOCK))
+        if (flags & (XBF_LOCK | XBF_TRYLOCK))
-                pagebuf_unlock(pb);
+                xfs_buf_unlock(bp);
-        pagebuf_rele(pb);
+        xfs_buf_rele(bp);
        return NULL;
 }
 xfs_buf_t *
 xfs_buf_read_flags(
        xfs_buftarg_t           *target,
-        loff_t                  ioff,
+        xfs_off_t               ioff,
        size_t                  isize,
-        page_buf_flags_t        flags)
+        xfs_buf_flags_t         flags)
 {
-        xfs_buf_t               *pb;
+        xfs_buf_t               *bp;
-        flags |= PBF_READ;
+        flags |= XBF_READ;
-        pb = xfs_buf_get_flags(target, ioff, isize, flags);
+        bp = xfs_buf_get_flags(target, ioff, isize, flags);
-        if (pb) {
+        if (bp) {
-                if (!XFS_BUF_ISDONE(pb)) {
+                if (!XFS_BUF_ISDONE(bp)) {
-                        PB_TRACE(pb, "read", (unsigned long)flags);
+                        XB_TRACE(bp, "read", (unsigned long)flags);
-                        XFS_STATS_INC(pb_get_read);
+                        XFS_STATS_INC(xb_get_read);
-                        pagebuf_iostart(pb, flags);
+                        xfs_buf_iostart(bp, flags);
-                } else if (flags & PBF_ASYNC) {
+                } else if (flags & XBF_ASYNC) {
-                        PB_TRACE(pb, "read_async", (unsigned long)flags);
+                        XB_TRACE(bp, "read_async", (unsigned long)flags);
                        /*
                         * Read ahead call which is already satisfied,
                         * drop the buffer
                         */
                        goto no_buffer;
                } else {
-                        PB_TRACE(pb, "read_done", (unsigned long)flags);
+                        XB_TRACE(bp, "read_done", (unsigned long)flags);
                        /* We do not want read in the flags */
-                        pb->pb_flags &= ~PBF_READ;
+                        bp->b_flags &= ~XBF_READ;
                }
        }
-        return pb;
+        return bp;
 no_buffer:
-        if (flags & (PBF_LOCK | PBF_TRYLOCK))
+        if (flags & (XBF_LOCK | XBF_TRYLOCK))
-                pagebuf_unlock(pb);
+                xfs_buf_unlock(bp);
-        pagebuf_rele(pb);
+        xfs_buf_rele(bp);
        return NULL;
 }
 /*
- * If we are not low on memory then do the readahead in a deadlock
+ *      If we are not low on memory then do the readahead in a deadlock
- * safe manner.
+ *      safe manner.
 */
 void
-pagebuf_readahead(
+xfs_buf_readahead(
        xfs_buftarg_t           *target,
-        loff_t                  ioff,
+        xfs_off_t               ioff,
        size_t                  isize,
-        page_buf_flags_t        flags)
+        xfs_buf_flags_t         flags)
 {
        struct backing_dev_info *bdi;
-        bdi = target->pbr_mapping->backing_dev_info;
+        bdi = target->bt_mapping->backing_dev_info;
        if (bdi_read_congested(bdi))
                return;
-        flags |= (PBF_TRYLOCK|PBF_ASYNC|PBF_READ_AHEAD);
+        flags |= (XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD);
        xfs_buf_read_flags(target, ioff, isize, flags);
 }
 xfs_buf_t *
-pagebuf_get_empty(
+xfs_buf_get_empty(
        size_t                  len,
        xfs_buftarg_t           *target)
 {
-        xfs_buf_t               *pb;
+        xfs_buf_t               *bp;
-        pb = pagebuf_allocate(0);
+        bp = xfs_buf_allocate(0);
-        if (pb)
+        if (bp)
-                _pagebuf_initialize(pb, target, 0, len, 0);
+                _xfs_buf_initialize(bp, target, 0, len, 0);
-        return pb;
+        return bp;
 }
 static inline struct page *
@@ -704,8 +701,8 @@ mem_to_page(
 }
 int
-pagebuf_associate_memory(
+xfs_buf_associate_memory(
-        xfs_buf_t               *pb,
+        xfs_buf_t               *bp,
        void                    *mem,
        size_t                  len)
 {
@@ -722,40 +719,40 @@ pagebuf_associate_memory(
                page_count++;
        /* Free any previous set of page pointers */
-        if (pb->pb_pages)
+        if (bp->b_pages)
-                _pagebuf_free_pages(pb);
+                _xfs_buf_free_pages(bp);
-        pb->pb_pages = NULL;
+        bp->b_pages = NULL;
-        pb->pb_addr = mem;
+        bp->b_addr = mem;
-        rval = _pagebuf_get_pages(pb, page_count, 0);
+        rval = _xfs_buf_get_pages(bp, page_count, 0);
        if (rval)
                return rval;
-        pb->pb_offset = offset;
+        bp->b_offset = offset;
        ptr = (size_t) mem & PAGE_CACHE_MASK;
        end = PAGE_CACHE_ALIGN((size_t) mem + len);
        end_cur = end;
        /* set up first page */
-        pb->pb_pages[0] = mem_to_page(mem);
+        bp->b_pages[0] = mem_to_page(mem);
        ptr += PAGE_CACHE_SIZE;
-        pb->pb_page_count = ++i;
+        bp->b_page_count = ++i;
        while (ptr < end) {
-                pb->pb_pages[i] = mem_to_page((void *)ptr);
+                bp->b_pages[i] = mem_to_page((void *)ptr);
-                pb->pb_page_count = ++i;
+                bp->b_page_count = ++i;
                ptr += PAGE_CACHE_SIZE;
        }
-        pb->pb_locked = 0;
+        bp->b_locked = 0;
-        pb->pb_count_desired = pb->pb_buffer_length = len;
+        bp->b_count_desired = bp->b_buffer_length = len;
-        pb->pb_flags |= PBF_MAPPED;
+        bp->b_flags |= XBF_MAPPED;
        return 0;
 }
 xfs_buf_t *
-pagebuf_get_no_daddr(
+xfs_buf_get_noaddr(
        size_t                  len,
        xfs_buftarg_t           *target)
 {
@@ -764,10 +761,10 @@ pagebuf_get_no_daddr(
        void                    *data;
        int                     error;
-        bp = pagebuf_allocate(0);
+        bp = xfs_buf_allocate(0);
        if (unlikely(bp == NULL))
                goto fail;
-        _pagebuf_initialize(bp, target, 0, len, 0);
+        _xfs_buf_initialize(bp, target, 0, len, 0);
 try_again:
        data = kmem_alloc(malloc_len, KM_SLEEP | KM_MAYFAIL);
@@ -776,78 +773,73 @@ pagebuf_get_no_daddr(
        /* check whether alignment matches.. */
        if ((__psunsigned_t)data !=
-            ((__psunsigned_t)data & ~target->pbr_smask)) {
+            ((__psunsigned_t)data & ~target->bt_smask)) {
                /* .. else double the size and try again */
                kmem_free(data, malloc_len);
                malloc_len <<= 1;
                goto try_again;
        }
-        error = pagebuf_associate_memory(bp, data, len);
+        error = xfs_buf_associate_memory(bp, data, len);
        if (error)
                goto fail_free_mem;
-        bp->pb_flags |= _PBF_KMEM_ALLOC;
+        bp->b_flags |= _XBF_KMEM_ALLOC;
-        pagebuf_unlock(bp);
+        xfs_buf_unlock(bp);
-        PB_TRACE(bp, "no_daddr", data);
+        XB_TRACE(bp, "no_daddr", data);
        return bp;
 fail_free_mem:
        kmem_free(data, malloc_len);
 fail_free_buf:
-        pagebuf_free(bp);
+        xfs_buf_free(bp);
 fail:
        return NULL;
 }
 /*
- *      pagebuf_hold
- *
 *      Increment reference count on buffer, to hold the buffer concurrently
 *      with another thread which may release (free) the buffer asynchronously.
- *
 *      Must hold the buffer already to call this function.
 */
 void
-pagebuf_hold(
+xfs_buf_hold(
-        xfs_buf_t               *pb)
+        xfs_buf_t               *bp)
 {
-        atomic_inc(&pb->pb_hold);
+        atomic_inc(&bp->b_hold);
-        PB_TRACE(pb, "hold", 0);
+        XB_TRACE(bp, "hold", 0);
 }
 /*
- *      pagebuf_rele
+ *      Releases a hold on the specified buffer.  If the
- *
+ *      the hold count is 1, calls xfs_buf_free.
- *      pagebuf_rele releases a hold on the specified buffer.  If the
- *      the hold count is 1, pagebuf_rele calls pagebuf_free.
 */
 void
-pagebuf_rele(
+xfs_buf_rele(
-        xfs_buf_t               *pb)
+        xfs_buf_t               *bp)
 {
-        xfs_bufhash_t           *hash = pb->pb_hash;
+        xfs_bufhash_t           *hash = bp->b_hash;
-        PB_TRACE(pb, "rele", pb->pb_relse);
+        XB_TRACE(bp, "rele", bp->b_relse);
-        if (atomic_dec_and_lock(&pb->pb_hold, &hash->bh_lock)) {
+        if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) {
-                if (pb->pb_relse) {
+                if (bp->b_relse) {
-                        atomic_inc(&pb->pb_hold);
+                        atomic_inc(&bp->b_hold);
                        spin_unlock(&hash->bh_lock);
-                        (*(pb->pb_relse)) (pb);
+                        (*(bp->b_relse)) (bp);
-                } else if (pb->pb_flags & PBF_FS_MANAGED) {
+                } else if (bp->b_flags & XBF_FS_MANAGED) {
                        spin_unlock(&hash->bh_lock);
                } else {
-                        ASSERT(!(pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)));
+                        ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)));
-                        list_del_init(&pb->pb_hash_list);
+                        list_del_init(&bp->b_hash_list);
                        spin_unlock(&hash->bh_lock);
-                        pagebuf_free(pb);
+                        xfs_buf_free(bp);
                }
        } else {
                /*
                 * Catch reference count leaks
                 */
-                ASSERT(atomic_read(&pb->pb_hold) >= 0);
+                ASSERT(atomic_read(&bp->b_hold) >= 0);
        }
 }
@@ -863,168 +855,122 @@ pagebuf_rele(
 */
 /*
- *      pagebuf_cond_lock
+ *      Locks a buffer object, if it is not already locked.
- *
+ *      Note that this in no way locks the underlying pages, so it is only
- *      pagebuf_cond_lock locks a buffer object, if it is not already locked.
+ *      useful for synchronizing concurrent use of buffer objects, not for
- *      Note that this in no way
+ *      synchronizing independent access to the underlying pages.
- *      locks the underlying pages, so it is only useful for synchronizing
- *      concurrent use of page buffer objects, not for synchronizing independent
- *      access to the underlying pages.
 */
 int
-pagebuf_cond_lock(                      /* lock buffer, if not locked   */
+xfs_buf_cond_lock(
-                                        /* returns -EBUSY if locked)    */
+        xfs_buf_t               *bp)
-        xfs_buf_t               *pb)
 {
        int                     locked;
-        locked = down_trylock(&pb->pb_sema) == 0;
+        locked = down_trylock(&bp->b_sema) == 0;
        if (locked) {
-                PB_SET_OWNER(pb);
+                XB_SET_OWNER(bp);
        }
-        PB_TRACE(pb, "cond_lock", (long)locked);
+        XB_TRACE(bp, "cond_lock", (long)locked);
-        return(locked ? 0 : -EBUSY);
+        return locked ? 0 : -EBUSY;
 }
 #if defined(DEBUG) || defined(XFS_BLI_TRACE)
-/*
- *      pagebuf_lock_value
- *
- *      Return lock value for a pagebuf
- */
 int
-pagebuf_lock_value(
+xfs_buf_lock_value(
-        xfs_buf_t               *pb)
+        xfs_buf_t               *bp)
 {
-        return(atomic_read(&pb->pb_sema.count));
+        return atomic_read(&bp->b_sema.count);
 }
 #endif
 /*
- *      pagebuf_lock
+ *      Locks a buffer object.
- *
+ *      Note that this in no way locks the underlying pages, so it is only
- *      pagebuf_lock locks a buffer object.  Note that this in no way
+ *      useful for synchronizing concurrent use of buffer objects, not for
- *      locks the underlying pages, so it is only useful for synchronizing
+ *      synchronizing independent access to the underlying pages.
- *      concurrent use of page buffer objects, not for synchronizing independent
- *      access to the underlying pages.
 */
-int
+void
-pagebuf_lock(
+xfs_buf_lock(
-        xfs_buf_t               *pb)
+        xfs_buf_t               *bp)
 {
-        PB_TRACE(pb, "lock", 0);
+        XB_TRACE(bp, "lock", 0);
-        if (atomic_read(&pb->pb_io_remaining))
+        if (atomic_read(&bp->b_io_remaining))
-                blk_run_address_space(pb->pb_target->pbr_mapping);
+                blk_run_address_space(bp->b_target->bt_mapping);
-        down(&pb->pb_sema);
+        down(&bp->b_sema);
-        PB_SET_OWNER(pb);
+        XB_SET_OWNER(bp);
-        PB_TRACE(pb, "locked", 0);
+        XB_TRACE(bp, "locked", 0);
-        return 0;
 }
 /*
- *      pagebuf_unlock
+ *      Releases the lock on the buffer object.
- *
- *      pagebuf_unlock releases the lock on the buffer object created by
- *      pagebuf_lock or pagebuf_cond_lock (not any pinning of underlying pages
- *      created by pagebuf_pin).
- *
 *      If the buffer is marked delwri but is not queued, do so before we
- *      unlock the buffer as we need to set flags correctly. We also need to
+ *      unlock the buffer as we need to set flags correctly.  We also need to
 *      take a reference for the delwri queue because the unlocker is going to
 *      drop their's and they don't know we just queued it.
 */
 void
-pagebuf_unlock(                         /* unlock buffer                */
+xfs_buf_unlock(
-        xfs_buf_t               *pb)    /* buffer to unlock             */
+        xfs_buf_t               *bp)
 {
-        if ((pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)) == PBF_DELWRI) {
+        if ((bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)) == XBF_DELWRI) {
-                atomic_inc(&pb->pb_hold);
+                atomic_inc(&bp->b_hold);
-                pb->pb_flags |= PBF_ASYNC;
+                bp->b_flags |= XBF_ASYNC;
-                pagebuf_delwri_queue(pb, 0);
+                xfs_buf_delwri_queue(bp, 0);
        }
-        PB_CLEAR_OWNER(pb);
+        XB_CLEAR_OWNER(bp);
-        up(&pb->pb_sema);
+        up(&bp->b_sema);
-        PB_TRACE(pb, "unlock", 0);
+        XB_TRACE(bp, "unlock", 0);
 }
 /*
 *      Pinning Buffer Storage in Memory
- */
+ *      Ensure that no attempt to force a buffer to disk will succeed.
-/*
- *      pagebuf_pin
- *
- *      pagebuf_pin locks all of the memory represented by a buffer in
- *      memory.  Multiple calls to pagebuf_pin and pagebuf_unpin, for
- *      the same or different buffers affecting a given page, will
- *      properly count the number of outstanding "pin" requests.  The
- *      buffer may be released after the pagebuf_pin and a different
- *      buffer used when calling pagebuf_unpin, if desired.
- *      pagebuf_pin should be used by the file system when it wants be
- *      assured that no attempt will be made to force the affected
- *      memory to disk.  It does not assure that a given logical page
- *      will not be moved to a different physical page.
 */
 void
-pagebuf_pin(
+xfs_buf_pin(
-        xfs_buf_t               *pb)
+        xfs_buf_t               *bp)
 {
-        atomic_inc(&pb->pb_pin_count);
+        atomic_inc(&bp->b_pin_count);
-        PB_TRACE(pb, "pin", (long)pb->pb_pin_count.counter);
+        XB_TRACE(bp, "pin", (long)bp->b_pin_count.counter);
 }
-/*
- *      pagebuf_unpin
- *
- *      pagebuf_unpin reverses the locking of memory performed by
- *      pagebuf_pin.  Note that both functions affected the logical
- *      pages associated with the buffer, not the buffer itself.
- */
 void
-pagebuf_unpin(
+xfs_buf_unpin(
-        xfs_buf_t               *pb)
+        xfs_buf_t               *bp)
 {
-        if (atomic_dec_and_test(&pb->pb_pin_count)) {
+        if (atomic_dec_and_test(&bp->b_pin_count))
-                wake_up_all(&pb->pb_waiters);
+                wake_up_all(&bp->b_waiters);
-        }
+        XB_TRACE(bp, "unpin", (long)bp->b_pin_count.counter);
-        PB_TRACE(pb, "unpin", (long)pb->pb_pin_count.counter);
 }
 int
-pagebuf_ispin(
+xfs_buf_ispin(
-        xfs_buf_t               *pb)
+        xfs_buf_t               *bp)
 {
-        return atomic_read(&pb->pb_pin_count);
+        return atomic_read(&bp->b_pin_count);
 }
-/*
+STATIC void
- *      pagebuf_wait_unpin
+xfs_buf_wait_unpin(
- *
+        xfs_buf_t               *bp)
- *      pagebuf_wait_unpin waits until all of the memory associated
- *      with the buffer is not longer locked in memory.  It returns
- *      immediately if none of the affected pages are locked.
- */
-static inline void
-_pagebuf_wait_unpin(
-        xfs_buf_t               *pb)
 {
        DECLARE_WAITQUEUE       (wait, current);
-        if (atomic_read(&pb->pb_pin_count) == 0)
+        if (atomic_read(&bp->b_pin_count) == 0)
                return;
-        add_wait_queue(&pb->pb_waiters, &wait);
+        add_wait_queue(&bp->b_waiters, &wait);
        for (;;) {
                set_current_state(TASK_UNINTERRUPTIBLE);
-                if (atomic_read(&pb->pb_pin_count) == 0)
+                if (atomic_read(&bp->b_pin_count) == 0)
                        break;
-                if (atomic_read(&pb->pb_io_remaining))
+                if (atomic_read(&bp->b_io_remaining))
-                        blk_run_address_space(pb->pb_target->pbr_mapping);
+                        blk_run_address_space(bp->b_target->bt_mapping);
                schedule();
        }
-        remove_wait_queue(&pb->pb_waiters, &wait);
+        remove_wait_queue(&bp->b_waiters, &wait);
        set_current_state(TASK_RUNNING);
 }
@@ -1032,241 +978,216 @@ _pagebuf_wait_unpin(
 *      Buffer Utility Routines
 */
-/*
- *      pagebuf_iodone
- *
- *      pagebuf_iodone marks a buffer for which I/O is in progress
- *      done with respect to that I/O.  The pb_iodone routine, if
- *      present, will be called as a side-effect.
- */
 STATIC void
-pagebuf_iodone_work(
+xfs_buf_iodone_work(
        void                    *v)
 {
        xfs_buf_t               *bp = (xfs_buf_t *)v;
-        if (bp->pb_iodone)
+        if (bp->b_iodone)
-                (*(bp->pb_iodone))(bp);
+                (*(bp->b_iodone))(bp);
-        else if (bp->pb_flags & PBF_ASYNC)
+        else if (bp->b_flags & XBF_ASYNC)
                xfs_buf_relse(bp);
 }
 void
-pagebuf_iodone(
+xfs_buf_ioend(
-        xfs_buf_t               *pb,
+        xfs_buf_t               *bp,
        int                     schedule)
 {
-        pb->pb_flags &= ~(PBF_READ | PBF_WRITE);
+        bp->b_flags &= ~(XBF_READ | XBF_WRITE);
-        if (pb->pb_error == 0)
+        if (bp->b_error == 0)
-                pb->pb_flags |= PBF_DONE;
+                bp->b_flags |= XBF_DONE;
-        PB_TRACE(pb, "iodone", pb->pb_iodone);
+        XB_TRACE(bp, "iodone", bp->b_iodone);
-        if ((pb->pb_iodone) || (pb->pb_flags & PBF_ASYNC)) {
+        if ((bp->b_iodone) || (bp->b_flags & XBF_ASYNC)) {
                if (schedule) {
-                        INIT_WORK(&pb->pb_iodone_work, pagebuf_iodone_work, pb);
+                        INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work, bp);
-                        queue_work(xfslogd_workqueue, &pb->pb_iodone_work);
+                        queue_work(xfslogd_workqueue, &bp->b_iodone_work);
                } else {
-                        pagebuf_iodone_work(pb);
+                        xfs_buf_iodone_work(bp);
                }
        } else {
-                up(&pb->pb_iodonesema);
+                up(&bp->b_iodonesema);
        }
 }
-/*
- *      pagebuf_ioerror
- *
- *      pagebuf_ioerror sets the error code for a buffer.
- */
 void
-pagebuf_ioerror(                        /* mark/clear buffer error flag */
+xfs_buf_ioerror(
-        xfs_buf_t               *pb,    /* buffer to mark               */
+        xfs_buf_t               *bp,
-        int                     error)  /* error to store (0 if none)   */
+        int                     error)
 {
        ASSERT(error >= 0 && error <= 0xffff);
-        pb->pb_error = (unsigned short)error;
+        bp->b_error = (unsigned short)error;
-        PB_TRACE(pb, "ioerror", (unsigned long)error);
+        XB_TRACE(bp, "ioerror", (unsigned long)error);
 }
 /*
- *      pagebuf_iostart
+ *      Initiate I/O on a buffer, based on the flags supplied.
- *
+ *      The b_iodone routine in the buffer supplied will only be called
- *      pagebuf_iostart initiates I/O on a buffer, based on the flags supplied.
- *      If necessary, it will arrange for any disk space allocation required,
- *      and it will break up the request if the block mappings require it.
- *      The pb_iodone routine in the buffer supplied will only be called
 *      when all of the subsidiary I/O requests, if any, have been completed.
- *      pagebuf_iostart calls the pagebuf_ioinitiate routine or
- *      pagebuf_iorequest, if the former routine is not defined, to start
- *      the I/O on a given low-level request.
 */
 int
-pagebuf_iostart(                        /* start I/O on a buffer          */
+xfs_buf_iostart(
-        xfs_buf_t               *pb,    /* buffer to start                */
+        xfs_buf_t               *bp,
-        page_buf_flags_t        flags)  /* PBF_LOCK, PBF_ASYNC, PBF_READ, */
+        xfs_buf_flags_t         flags)
-                                        /* PBF_WRITE, PBF_DELWRI,         */
-                                        /* PBF_DONT_BLOCK                 */
 {
        int                     status = 0;
-        PB_TRACE(pb, "iostart", (unsigned long)flags);
+        XB_TRACE(bp, "iostart", (unsigned long)flags);
-        if (flags & PBF_DELWRI) {
+        if (flags & XBF_DELWRI) {
-                pb->pb_flags &= ~(PBF_READ | PBF_WRITE | PBF_ASYNC);
+                bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC);
-                pb->pb_flags |= flags & (PBF_DELWRI | PBF_ASYNC);
+                bp->b_flags |= flags & (XBF_DELWRI | XBF_ASYNC);
-                pagebuf_delwri_queue(pb, 1);
+                xfs_buf_delwri_queue(bp, 1);
                return status;
        }
-        pb->pb_flags &= ~(PBF_READ | PBF_WRITE | PBF_ASYNC | PBF_DELWRI | \
+        bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \
-                        PBF_READ_AHEAD | _PBF_RUN_QUEUES);
+                        XBF_READ_AHEAD | _XBF_RUN_QUEUES);
-        pb->pb_flags |= flags & (PBF_READ | PBF_WRITE | PBF_ASYNC | \
+        bp->b_flags |= flags & (XBF_READ | XBF_WRITE | XBF_ASYNC | \
-                        PBF_READ_AHEAD | _PBF_RUN_QUEUES);
+                        XBF_READ_AHEAD | _XBF_RUN_QUEUES);
-        BUG_ON(pb->pb_bn == XFS_BUF_DADDR_NULL);
+        BUG_ON(bp->b_bn == XFS_BUF_DADDR_NULL);
        /* For writes allow an alternate strategy routine to precede
         * the actual I/O request (which may not be issued at all in
         * a shutdown situation, for example).
         */
-        status = (flags & PBF_WRITE) ?
+        status = (flags & XBF_WRITE) ?
-                pagebuf_iostrategy(pb) : pagebuf_iorequest(pb);
+                xfs_buf_iostrategy(bp) : xfs_buf_iorequest(bp);
        /* Wait for I/O if we are not an async request.
         * Note: async I/O request completion will release the buffer,
         * and that can already be done by this point.  So using the
         * buffer pointer from here on, after async I/O, is invalid.
         */
-        if (!status && !(flags & PBF_ASYNC))
+        if (!status && !(flags & XBF_ASYNC))
-                status = pagebuf_iowait(pb);
+                status = xfs_buf_iowait(bp);
        return status;
 }
-/*
- * Helper routine for pagebuf_iorequest
- */
 STATIC __inline__ int
-_pagebuf_iolocked(
+_xfs_buf_iolocked(
-        xfs_buf_t               *pb)
+        xfs_buf_t               *bp)
 {
-        ASSERT(pb->pb_flags & (PBF_READ|PBF_WRITE));
+        ASSERT(bp->b_flags & (XBF_READ | XBF_WRITE));
-        if (pb->pb_flags & PBF_READ)
+        if (bp->b_flags & XBF_READ)
-                return pb->pb_locked;
+                return bp->b_locked;
        return 0;
 }
 STATIC __inline__ void
-_pagebuf_iodone(
+_xfs_buf_ioend(
-        xfs_buf_t               *pb,
+        xfs_buf_t               *bp,
        int                     schedule)
 {
-        if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {
+        if (atomic_dec_and_test(&bp->b_io_remaining) == 1) {
-                pb->pb_locked = 0;
+                bp->b_locked = 0;
-                pagebuf_iodone(pb, schedule);
+                xfs_buf_ioend(bp, schedule);
        }
 }
 STATIC int
-bio_end_io_pagebuf(
+xfs_buf_bio_end_io(
        struct bio              *bio,
        unsigned int            bytes_done,
        int                     error)
 {
-        xfs_buf_t               *pb = (xfs_buf_t *)bio->bi_private;
+        xfs_buf_t               *bp = (xfs_buf_t *)bio->bi_private;
-        unsigned int            blocksize = pb->pb_target->pbr_bsize;
+        unsigned int            blocksize = bp->b_target->bt_bsize;
        struct bio_vec          *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
        if (bio->bi_size)
                return 1;
        if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
-                pb->pb_error = EIO;
+                bp->b_error = EIO;
        do {
                struct page     *page = bvec->bv_page;
-                if (unlikely(pb->pb_error)) {
+                if (unlikely(bp->b_error)) {
-                        if (pb->pb_flags & PBF_READ)
+                        if (bp->b_flags & XBF_READ)
                                ClearPageUptodate(page);
                        SetPageError(page);
-                } else if (blocksize == PAGE_CACHE_SIZE) {
+                } else if (blocksize >= PAGE_CACHE_SIZE) {
                        SetPageUptodate(page);
                } else if (!PagePrivate(page) &&
-                                (pb->pb_flags & _PBF_PAGE_CACHE)) {
+                                (bp->b_flags & _XBF_PAGE_CACHE)) {
                        set_page_region(page, bvec->bv_offset, bvec->bv_len);
                }
                if (--bvec >= bio->bi_io_vec)
                        prefetchw(&bvec->bv_page->flags);
-                if (_pagebuf_iolocked(pb)) {
+                if (_xfs_buf_iolocked(bp)) {
                        unlock_page(page);
                }
        } while (bvec >= bio->bi_io_vec);
-        _pagebuf_iodone(pb, 1);
+        _xfs_buf_ioend(bp, 1);
        bio_put(bio);
        return 0;
 }
 STATIC void
-_pagebuf_ioapply(
+_xfs_buf_ioapply(
-        xfs_buf_t               *pb)
+        xfs_buf_t               *bp)
 {
        int                     i, rw, map_i, total_nr_pages, nr_pages;
        struct bio              *bio;
-        int                     offset = pb->pb_offset;
+        int                     offset = bp->b_offset;
-        int                     size = pb->pb_count_desired;
+        int                     size = bp->b_count_desired;
-        sector_t                sector = pb->pb_bn;
+        sector_t                sector = bp->b_bn;
-        unsigned int            blocksize = pb->pb_target->pbr_bsize;
+        unsigned int            blocksize = bp->b_target->bt_bsize;
-        int                     locking = _pagebuf_iolocked(pb);
+        int                     locking = _xfs_buf_iolocked(bp);
-        total_nr_pages = pb->pb_page_count;
+        total_nr_pages = bp->b_page_count;
        map_i = 0;
-        if (pb->pb_flags & _PBF_RUN_QUEUES) {
+        if (bp->b_flags & _XBF_RUN_QUEUES) {
-                pb->pb_flags &= ~_PBF_RUN_QUEUES;
+                bp->b_flags &= ~_XBF_RUN_QUEUES;
-                rw = (pb->pb_flags & PBF_READ) ? READ_SYNC : WRITE_SYNC;
+                rw = (bp->b_flags & XBF_READ) ? READ_SYNC : WRITE_SYNC;
        } else {
-                rw = (pb->pb_flags & PBF_READ) ? READ : WRITE;
+                rw = (bp->b_flags & XBF_READ) ? READ : WRITE;
        }
-        if (pb->pb_flags & PBF_ORDERED) {
+        if (bp->b_flags & XBF_ORDERED) {
-                ASSERT(!(pb->pb_flags & PBF_READ));
+                ASSERT(!(bp->b_flags & XBF_READ));
                rw = WRITE_BARRIER;
        }
-        /* Special code path for reading a sub page size pagebuf in --
+        /* Special code path for reading a sub page size buffer in --
         * we populate up the whole page, and hence the other metadata
         * in the same page.  This optimization is only valid when the
-         * filesystem block size and the page size are equal.
+         * filesystem block size is not smaller than the page size.
         */
-        if ((pb->pb_buffer_length < PAGE_CACHE_SIZE) &&
+        if ((bp->b_buffer_length < PAGE_CACHE_SIZE) &&
-            (pb->pb_flags & PBF_READ) && locking &&
+            (bp->b_flags & XBF_READ) && locking &&
-            (blocksize == PAGE_CACHE_SIZE)) {
+            (blocksize >= PAGE_CACHE_SIZE)) {
                bio = bio_alloc(GFP_NOIO, 1);
-                bio->bi_bdev = pb->pb_target->pbr_bdev;
+                bio->bi_bdev = bp->b_target->bt_bdev;
                bio->bi_sector = sector - (offset >> BBSHIFT);
-                bio->bi_end_io = bio_end_io_pagebuf;
+                bio->bi_end_io = xfs_buf_bio_end_io;
-                bio->bi_private = pb;
+                bio->bi_private = bp;
-                bio_add_page(bio, pb->pb_pages[0], PAGE_CACHE_SIZE, 0);
+                bio_add_page(bio, bp->b_pages[0], PAGE_CACHE_SIZE, 0);
                size = 0;
-                atomic_inc(&pb->pb_io_remaining);
+                atomic_inc(&bp->b_io_remaining);
                goto submit_io;
        }
        /* Lock down the pages which we need to for the request */
-        if (locking && (pb->pb_flags & PBF_WRITE) && (pb->pb_locked == 0)) {
+        if (locking && (bp->b_flags & XBF_WRITE) && (bp->b_locked == 0)) {
                for (i = 0; size; i++) {
                        int             nbytes = PAGE_CACHE_SIZE - offset;
-                        struct page     *page = pb->pb_pages[i];
+                        struct page     *page = bp->b_pages[i];
                        if (nbytes > size)
                                nbytes = size;
@@ -1276,30 +1197,30 @@ _pagebuf_ioapply(
                        size -= nbytes;
                        offset = 0;
                }
-                offset = pb->pb_offset;
+                offset = bp->b_offset;
-                size = pb->pb_count_desired;
+                size = bp->b_count_desired;
        }
 next_chunk:
-        atomic_inc(&pb->pb_io_remaining);
+        atomic_inc(&bp->b_io_remaining);
        nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT);
        if (nr_pages > total_nr_pages)
                nr_pages = total_nr_pages;
        bio = bio_alloc(GFP_NOIO, nr_pages);
-        bio->bi_bdev = pb->pb_target->pbr_bdev;
+        bio->bi_bdev = bp->b_target->bt_bdev;
        bio->bi_sector = sector;
-        bio->bi_end_io = bio_end_io_pagebuf;
+        bio->bi_end_io = xfs_buf_bio_end_io;
-        bio->bi_private = pb;
+        bio->bi_private = bp;
        for (; size && nr_pages; nr_pages--, map_i++) {
-                int     nbytes = PAGE_CACHE_SIZE - offset;
+                int     rbytes, nbytes = PAGE_CACHE_SIZE - offset;
                if (nbytes > size)
                        nbytes = size;
-                if (bio_add_page(bio, pb->pb_pages[map_i],
+                rbytes = bio_add_page(bio, bp->b_pages[map_i], nbytes, offset);
-                                        nbytes, offset) < nbytes)
+                if (rbytes < nbytes)
                        break;
                offset = 0;
@@ -1315,107 +1236,102 @@ submit_io:
                        goto next_chunk;
        } else {
                bio_put(bio);
-                pagebuf_ioerror(pb, EIO);
+                xfs_buf_ioerror(bp, EIO);
        }
 }
-/*
- *      pagebuf_iorequest -- the core I/O request routine.
- */
 int
-pagebuf_iorequest(                      /* start real I/O               */
+xfs_buf_iorequest(
-        xfs_buf_t               *pb)    /* buffer to convey to device   */
+        xfs_buf_t               *bp)
 {
-        PB_TRACE(pb, "iorequest", 0);
+        XB_TRACE(bp, "iorequest", 0);
-        if (pb->pb_flags & PBF_DELWRI) {
+        if (bp->b_flags & XBF_DELWRI) {
-                pagebuf_delwri_queue(pb, 1);
+                xfs_buf_delwri_queue(bp, 1);
                return 0;
        }
-        if (pb->pb_flags & PBF_WRITE) {
+        if (bp->b_flags & XBF_WRITE) {
-                _pagebuf_wait_unpin(pb);
+                xfs_buf_wait_unpin(bp);
        }
-        pagebuf_hold(pb);
+        xfs_buf_hold(bp);
        /* Set the count to 1 initially, this will stop an I/O
         * completion callout which happens before we have started
-         * all the I/O from calling pagebuf_iodone too early.
+         * all the I/O from calling xfs_buf_ioend too early.
         */
-        atomic_set(&pb->pb_io_remaining, 1);
+        atomic_set(&bp->b_io_remaining, 1);
-        _pagebuf_ioapply(pb);
+        _xfs_buf_ioapply(bp);
-        _pagebuf_iodone(pb, 0);
+        _xfs_buf_ioend(bp, 0);
-        pagebuf_rele(pb);
+        xfs_buf_rele(bp);
        return 0;
 }
 /*
- *      pagebuf_iowait
+ *      Waits for I/O to complete on the buffer supplied.
- *
+ *      It returns immediately if no I/O is pending.
- *      pagebuf_iowait waits for I/O to complete on the buffer supplied.
+ *      It returns the I/O error code, if any, or 0 if there was no error.
- *      It returns immediately if no I/O is pending.  In any case, it returns
- *      the error code, if any, or 0 if there is no error.
 */
 int
-pagebuf_iowait(
+xfs_buf_iowait(
-        xfs_buf_t               *pb)
+        xfs_buf_t               *bp)
 {
-        PB_TRACE(pb, "iowait", 0);
+        XB_TRACE(bp, "iowait", 0);
-        if (atomic_read(&pb->pb_io_remaining))
+        if (atomic_read(&bp->b_io_remaining))
-                blk_run_address_space(pb->pb_target->pbr_mapping);
+                blk_run_address_space(bp->b_target->bt_mapping);
-        down(&pb->pb_iodonesema);
+        down(&bp->b_iodonesema);
-        PB_TRACE(pb, "iowaited", (long)pb->pb_error);
+        XB_TRACE(bp, "iowaited", (long)bp->b_error);
-        return pb->pb_error;
+        return bp->b_error;
 }
-caddr_t
+xfs_caddr_t
-pagebuf_offset(
+xfs_buf_offset(
-        xfs_buf_t               *pb,
+        xfs_buf_t               *bp,
        size_t                  offset)
 {
        struct page             *page;
-        offset += pb->pb_offset;
+        if (bp->b_flags & XBF_MAPPED)
+                return XFS_BUF_PTR(bp) + offset;
-        page = pb->pb_pages[offset >> PAGE_CACHE_SHIFT];
+        offset += bp->b_offset;
-        return (caddr_t) page_address(page) + (offset & (PAGE_CACHE_SIZE - 1));
+        page = bp->b_pages[offset >> PAGE_CACHE_SHIFT];
+        return (xfs_caddr_t)page_address(page) + (offset & (PAGE_CACHE_SIZE-1));
 }
 /*
- *      pagebuf_iomove
- *
 *      Move data into or out of a buffer.
 */
 void
-pagebuf_iomove(
+xfs_buf_iomove(
-        xfs_buf_t               *pb,    /* buffer to process            */
+        xfs_buf_t               *bp,    /* buffer to process            */
        size_t                  boff,   /* starting buffer offset       */
        size_t                  bsize,  /* length to copy               */
        caddr_t                 data,   /* data address                 */
-        page_buf_rw_t           mode)   /* read/write flag              */
+        xfs_buf_rw_t            mode)   /* read/write/zero flag         */
 {
        size_t                  bend, cpoff, csize;
        struct page             *page;
        bend = boff + bsize;
        while (boff < bend) {
-                page = pb->pb_pages[page_buf_btoct(boff + pb->pb_offset)];
+                page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)];
-                cpoff = page_buf_poff(boff + pb->pb_offset);
+                cpoff = xfs_buf_poff(boff + bp->b_offset);
                csize = min_t(size_t,
-                              PAGE_CACHE_SIZE-cpoff, pb->pb_count_desired-boff);
+                              PAGE_CACHE_SIZE-cpoff, bp->b_count_desired-boff);
                ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE));
                switch (mode) {
-                case PBRW_ZERO:
+                case XBRW_ZERO:
                        memset(page_address(page) + cpoff, 0, csize);
                        break;
-                case PBRW_READ:
+                case XBRW_READ:
                        memcpy(data, page_address(page) + cpoff, csize);
                        break;
-                case PBRW_WRITE:
+                case XBRW_WRITE:
                        memcpy(page_address(page) + cpoff, data, csize);
                }
@@ -1425,12 +1341,12 @@ pagebuf_iomove(
 }
 /*
- *      Handling of buftargs.
+ *      Handling of buffer targets (buftargs).
 */
 /*
- * Wait for any bufs with callbacks that have been submitted but
+ *      Wait for any bufs with callbacks that have been submitted but
- * have not yet returned... walk the hash list for the target.
+ *      have not yet returned... walk the hash list for the target.
 */
 void
 xfs_wait_buftarg(
@@ -1444,15 +1360,15 @@ xfs_wait_buftarg(
                hash = &btp->bt_hash[i];
 again:
                spin_lock(&hash->bh_lock);
-                list_for_each_entry_safe(bp, n, &hash->bh_list, pb_hash_list) {
+                list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) {
-                        ASSERT(btp == bp->pb_target);
+                        ASSERT(btp == bp->b_target);
-                        if (!(bp->pb_flags & PBF_FS_MANAGED)) {
+                        if (!(bp->b_flags & XBF_FS_MANAGED)) {
                                spin_unlock(&hash->bh_lock);
                                /*
                                 * Catch superblock reference count leaks
                                 * immediately
                                 */
-                                BUG_ON(bp->pb_bn == 0);
+                                BUG_ON(bp->b_bn == 0);
                                delay(100);
                                goto again;
                        }
@@ -1462,9 +1378,9 @@ again:
 }
 /*
- * Allocate buffer hash table for a given target.
+ *      Allocate buffer hash table for a given target.
- * For devices containing metadata (i.e. not the log/realtime devices)
+ *      For devices containing metadata (i.e. not the log/realtime devices)
- * we need to allocate a much larger hash table.
+ *      we need to allocate a much larger hash table.
 */
 STATIC void
 xfs_alloc_bufhash(
@@ -1487,11 +1403,34 @@ STATIC void
 xfs_free_bufhash(
        xfs_buftarg_t           *btp)
 {
-        kmem_free(btp->bt_hash,
+        kmem_free(btp->bt_hash, (1<<btp->bt_hashshift) * sizeof(xfs_bufhash_t));
-                        (1 << btp->bt_hashshift) * sizeof(xfs_bufhash_t));
        btp->bt_hash = NULL;
 }
+/*
+ *      buftarg list for delwrite queue processing
+ */
+STATIC LIST_HEAD(xfs_buftarg_list);
+STATIC DEFINE_SPINLOCK(xfs_buftarg_lock);
+STATIC void
+xfs_register_buftarg(
+        xfs_buftarg_t           *btp)
+{
+        spin_lock(&xfs_buftarg_lock);
+        list_add(&btp->bt_list, &xfs_buftarg_list);
+        spin_unlock(&xfs_buftarg_lock);
+}
+STATIC void
+xfs_unregister_buftarg(
+        xfs_buftarg_t           *btp)
+{
+        spin_lock(&xfs_buftarg_lock);
+        list_del(&btp->bt_list);
+        spin_unlock(&xfs_buftarg_lock);
+}
 void
 xfs_free_buftarg(
        xfs_buftarg_t           *btp,
@@ -1499,9 +1438,16 @@ xfs_free_buftarg(
 {
        xfs_flush_buftarg(btp, 1);
        if (external)
-                xfs_blkdev_put(btp->pbr_bdev);
+                xfs_blkdev_put(btp->bt_bdev);
        xfs_free_bufhash(btp);
-        iput(btp->pbr_mapping->host);
+        iput(btp->bt_mapping->host);
+        /* Unregister the buftarg first so that we don't get a
+         * wakeup finding a non-existent task
+         */
+        xfs_unregister_buftarg(btp);
+        kthread_stop(btp->bt_task);
        kmem_free(btp, sizeof(*btp));
 }
@@ -1512,11 +1458,11 @@ xfs_setsize_buftarg_flags(
        unsigned int            sectorsize,
        int                     verbose)
 {
-        btp->pbr_bsize = blocksize;
+        btp->bt_bsize = blocksize;
-        btp->pbr_sshift = ffs(sectorsize) - 1;
+        btp->bt_sshift = ffs(sectorsize) - 1;
-        btp->pbr_smask = sectorsize - 1;
+        btp->bt_smask = sectorsize - 1;
-        if (set_blocksize(btp->pbr_bdev, sectorsize)) {
+        if (set_blocksize(btp->bt_bdev, sectorsize)) {
                printk(KERN_WARNING
                        "XFS: Cannot set_blocksize to %u on device %s\n",
                        sectorsize, XFS_BUFTARG_NAME(btp));
@@ -1536,10 +1482,10 @@ xfs_setsize_buftarg_flags(
 }
 /*
-* When allocating the initial buffer target we have not yet
+ *      When allocating the initial buffer target we have not yet
-* read in the superblock, so don't know what sized sectors
+ *      read in the superblock, so don't know what sized sectors
-* are being used is at this early stage.  Play safe.
+ *      are being used is at this early stage.  Play safe.
-*/
+ */
 STATIC int
 xfs_setsize_buftarg_early(
        xfs_buftarg_t           *btp,
@@ -1587,10 +1533,30 @@ xfs_mapping_buftarg(
        mapping->a_ops = &mapping_aops;
        mapping->backing_dev_info = bdi;
        mapping_set_gfp_mask(mapping, GFP_NOFS);
-        btp->pbr_mapping = mapping;
+        btp->bt_mapping = mapping;
        return 0;
 }
+STATIC int
+xfs_alloc_delwrite_queue(
+        xfs_buftarg_t           *btp)
+{
+        int     error = 0;
+        INIT_LIST_HEAD(&btp->bt_list);
+        INIT_LIST_HEAD(&btp->bt_delwrite_queue);
+        spinlock_init(&btp->bt_delwrite_lock, "delwri_lock");
+        btp->bt_flags = 0;
+        btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd");
+        if (IS_ERR(btp->bt_task)) {
+                error = PTR_ERR(btp->bt_task);
+                goto out_error;
+        }
+        xfs_register_buftarg(btp);
+out_error:
+        return error;
+}
 xfs_buftarg_t *
 xfs_alloc_buftarg(
        struct block_device     *bdev,
@@ -1600,12 +1566,14 @@ xfs_alloc_buftarg(
        btp = kmem_zalloc(sizeof(*btp), KM_SLEEP);
-        btp->pbr_dev =  bdev->bd_dev;
+        btp->bt_dev =  bdev->bd_dev;
-        btp->pbr_bdev = bdev;
+        btp->bt_bdev = bdev;
        if (xfs_setsize_buftarg_early(btp, bdev))
                goto error;
        if (xfs_mapping_buftarg(btp, bdev))
                goto error;
+        if (xfs_alloc_delwrite_queue(btp))
+                goto error;
        xfs_alloc_bufhash(btp, external);
        return btp;
@@ -1616,83 +1584,81 @@ error:
 /*
- * Pagebuf delayed write buffer handling
+ *      Delayed write buffer handling
 */
-STATIC LIST_HEAD(pbd_delwrite_queue);
-STATIC DEFINE_SPINLOCK(pbd_delwrite_lock);
 STATIC void
-pagebuf_delwri_queue(
+xfs_buf_delwri_queue(
-        xfs_buf_t               *pb,
+        xfs_buf_t               *bp,
        int                     unlock)
 {
-        PB_TRACE(pb, "delwri_q", (long)unlock);
+        struct list_head        *dwq = &bp->b_target->bt_delwrite_queue;
-        ASSERT((pb->pb_flags & (PBF_DELWRI|PBF_ASYNC)) ==
+        spinlock_t              *dwlk = &bp->b_target->bt_delwrite_lock;
-                                        (PBF_DELWRI|PBF_ASYNC));
+        XB_TRACE(bp, "delwri_q", (long)unlock);
+        ASSERT((bp->b_flags&(XBF_DELWRI|XBF_ASYNC)) == (XBF_DELWRI|XBF_ASYNC));
-        spin_lock(&pbd_delwrite_lock);
+        spin_lock(dwlk);
        /* If already in the queue, dequeue and place at tail */
-        if (!list_empty(&pb->pb_list)) {
+        if (!list_empty(&bp->b_list)) {
-                ASSERT(pb->pb_flags & _PBF_DELWRI_Q);
+                ASSERT(bp->b_flags & _XBF_DELWRI_Q);
-                if (unlock) {
+                if (unlock)
-                        atomic_dec(&pb->pb_hold);
+                        atomic_dec(&bp->b_hold);
-                }
+                list_del(&bp->b_list);
-                list_del(&pb->pb_list);
        }
-        pb->pb_flags |= _PBF_DELWRI_Q;
+        bp->b_flags |= _XBF_DELWRI_Q;
-        list_add_tail(&pb->pb_list, &pbd_delwrite_queue);
+        list_add_tail(&bp->b_list, dwq);
-        pb->pb_queuetime = jiffies;
+        bp->b_queuetime = jiffies;
-        spin_unlock(&pbd_delwrite_lock);
+        spin_unlock(dwlk);
        if (unlock)
-                pagebuf_unlock(pb);
+                xfs_buf_unlock(bp);
 }
 void
-pagebuf_delwri_dequeue(
+xfs_buf_delwri_dequeue(
-        xfs_buf_t               *pb)
+        xfs_buf_t               *bp)
 {
+        spinlock_t              *dwlk = &bp->b_target->bt_delwrite_lock;
        int                     dequeued = 0;
-        spin_lock(&pbd_delwrite_lock);
+        spin_lock(dwlk);
-        if ((pb->pb_flags & PBF_DELWRI) && !list_empty(&pb->pb_list)) {
+        if ((bp->b_flags & XBF_DELWRI) && !list_empty(&bp->b_list)) {
-                ASSERT(pb->pb_flags & _PBF_DELWRI_Q);
+                ASSERT(bp->b_flags & _XBF_DELWRI_Q);
-                list_del_init(&pb->pb_list);
+                list_del_init(&bp->b_list);
                dequeued = 1;
        }
-        pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q);
+        bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q);
-        spin_unlock(&pbd_delwrite_lock);
+        spin_unlock(dwlk);
        if (dequeued)
-                pagebuf_rele(pb);
+                xfs_buf_rele(bp);
-        PB_TRACE(pb, "delwri_dq", (long)dequeued);
+        XB_TRACE(bp, "delwri_dq", (long)dequeued);
 }
 STATIC void
-pagebuf_runall_queues(
+xfs_buf_runall_queues(
        struct workqueue_struct *queue)
 {
        flush_workqueue(queue);
 }
-/* Defines for pagebuf daemon */
-STATIC struct task_struct *xfsbufd_task;
-STATIC int xfsbufd_force_flush;
-STATIC int xfsbufd_force_sleep;
 STATIC int
 xfsbufd_wakeup(
        int                     priority,
        gfp_t                   mask)
 {
-        if (xfsbufd_force_sleep)
+        xfs_buftarg_t           *btp;
-                return 0;
-        xfsbufd_force_flush = 1;
+        spin_lock(&xfs_buftarg_lock);
-        barrier();
+        list_for_each_entry(btp, &xfs_buftarg_list, bt_list) {
-        wake_up_process(xfsbufd_task);
+                if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags))
+                        continue;
+                set_bit(XBT_FORCE_FLUSH, &btp->bt_flags);
+                wake_up_process(btp->bt_task);
+        }
+        spin_unlock(&xfs_buftarg_lock);
        return 0;
 }
@@ -1702,67 +1668,70 @@ xfsbufd(
 {
        struct list_head        tmp;
        unsigned long           age;
-        xfs_buftarg_t           *target;
+        xfs_buftarg_t           *target = (xfs_buftarg_t *)data;
-        xfs_buf_t               *pb, *n;
+        xfs_buf_t               *bp, *n;
+        struct list_head        *dwq = &target->bt_delwrite_queue;
+        spinlock_t              *dwlk = &target->bt_delwrite_lock;
        current->flags |= PF_MEMALLOC;
        INIT_LIST_HEAD(&tmp);
        do {
                if (unlikely(freezing(current))) {
-                        xfsbufd_force_sleep = 1;
+                        set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
                        refrigerator();
                } else {
-                        xfsbufd_force_sleep = 0;
+                        clear_bit(XBT_FORCE_SLEEP, &target->bt_flags);
                }
                schedule_timeout_interruptible(
                        xfs_buf_timer_centisecs * msecs_to_jiffies(10));
                age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
-                spin_lock(&pbd_delwrite_lock);
+                spin_lock(dwlk);
-                list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) {
+                list_for_each_entry_safe(bp, n, dwq, b_list) {
-                        PB_TRACE(pb, "walkq1", (long)pagebuf_ispin(pb));
+                        XB_TRACE(bp, "walkq1", (long)xfs_buf_ispin(bp));
-                        ASSERT(pb->pb_flags & PBF_DELWRI);
+                        ASSERT(bp->b_flags & XBF_DELWRI);
-                        if (!pagebuf_ispin(pb) && !pagebuf_cond_lock(pb)) {
+                        if (!xfs_buf_ispin(bp) && !xfs_buf_cond_lock(bp)) {
-                                if (!xfsbufd_force_flush &&
+                                if (!test_bit(XBT_FORCE_FLUSH,
+                                                &target->bt_flags) &&
                                    time_before(jiffies,
-                                                pb->pb_queuetime + age)) {
+                                                bp->b_queuetime + age)) {
-                                        pagebuf_unlock(pb);
+                                        xfs_buf_unlock(bp);
                                        break;
                                }
-                                pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q);
+                                bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q);
-                                pb->pb_flags |= PBF_WRITE;
+                                bp->b_flags |= XBF_WRITE;
-                                list_move(&pb->pb_list, &tmp);
+                                list_move(&bp->b_list, &tmp);
                        }
                }
-                spin_unlock(&pbd_delwrite_lock);
+                spin_unlock(dwlk);
                while (!list_empty(&tmp)) {
-                        pb = list_entry(tmp.next, xfs_buf_t, pb_list);
+                        bp = list_entry(tmp.next, xfs_buf_t, b_list);
-                        target = pb->pb_target;
+                        ASSERT(target == bp->b_target);
-                        list_del_init(&pb->pb_list);
+                        list_del_init(&bp->b_list);
-                        pagebuf_iostrategy(pb);
+                        xfs_buf_iostrategy(bp);
-                        blk_run_address_space(target->pbr_mapping);
+                        blk_run_address_space(target->bt_mapping);
                }
                if (as_list_len > 0)
                        purge_addresses();
-                xfsbufd_force_flush = 0;
+                clear_bit(XBT_FORCE_FLUSH, &target->bt_flags);
        } while (!kthread_should_stop());
        return 0;
 }
 /*
- * Go through all incore buffers, and release buffers if they belong to
+ *      Go through all incore buffers, and release buffers if they belong to
- * the given device. This is used in filesystem error handling to
+ *      the given device. This is used in filesystem error handling to
- * preserve the consistency of its metadata.
+ *      preserve the consistency of its metadata.
 */
 int
 xfs_flush_buftarg(
@@ -1770,73 +1739,72 @@ xfs_flush_buftarg(
        int                     wait)
 {
        struct list_head        tmp;
-        xfs_buf_t               *pb, *n;
+        xfs_buf_t               *bp, *n;
        int                     pincount = 0;
+        struct list_head        *dwq = &target->bt_delwrite_queue;
+        spinlock_t              *dwlk = &target->bt_delwrite_lock;
-        pagebuf_runall_queues(xfsdatad_workqueue);
+        xfs_buf_runall_queues(xfsdatad_workqueue);
-        pagebuf_runall_queues(xfslogd_workqueue);
+        xfs_buf_runall_queues(xfslogd_workqueue);
        INIT_LIST_HEAD(&tmp);
-        spin_lock(&pbd_delwrite_lock);
+        spin_lock(dwlk);
-        list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) {
+        list_for_each_entry_safe(bp, n, dwq, b_list) {
+                ASSERT(bp->b_target == target);
-                if (pb->pb_target != target)
+                ASSERT(bp->b_flags & (XBF_DELWRI | _XBF_DELWRI_Q));
-                        continue;
+                XB_TRACE(bp, "walkq2", (long)xfs_buf_ispin(bp));
+                if (xfs_buf_ispin(bp)) {
-                ASSERT(pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q));
-                PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb));
-                if (pagebuf_ispin(pb)) {
                        pincount++;
                        continue;
                }
-                list_move(&pb->pb_list, &tmp);
+                list_move(&bp->b_list, &tmp);
        }
-        spin_unlock(&pbd_delwrite_lock);
+        spin_unlock(dwlk);
        /*
         * Dropped the delayed write list lock, now walk the temporary list
         */
-        list_for_each_entry_safe(pb, n, &tmp, pb_list) {
+        list_for_each_entry_safe(bp, n, &tmp, b_list) {
-                pagebuf_lock(pb);
+                xfs_buf_lock(bp);
-                pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q);
+                bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q);
-                pb->pb_flags |= PBF_WRITE;
+                bp->b_flags |= XBF_WRITE;
                if (wait)
-                        pb->pb_flags &= ~PBF_ASYNC;
+                        bp->b_flags &= ~XBF_ASYNC;
                else
-                        list_del_init(&pb->pb_list);
+                        list_del_init(&bp->b_list);
-                pagebuf_iostrategy(pb);
+                xfs_buf_iostrategy(bp);
        }
        /*
         * Remaining list items must be flushed before returning
         */
        while (!list_empty(&tmp)) {
-                pb = list_entry(tmp.next, xfs_buf_t, pb_list);
+                bp = list_entry(tmp.next, xfs_buf_t, b_list);
-                list_del_init(&pb->pb_list);
+                list_del_init(&bp->b_list);
-                xfs_iowait(pb);
+                xfs_iowait(bp);
-                xfs_buf_relse(pb);
+                xfs_buf_relse(bp);
        }
        if (wait)
-                blk_run_address_space(target->pbr_mapping);
+                blk_run_address_space(target->bt_mapping);
        return pincount;
 }
 int __init
-pagebuf_init(void)
+xfs_buf_init(void)
 {
        int             error = -ENOMEM;
-#ifdef PAGEBUF_TRACE
+#ifdef XFS_BUF_TRACE
-        pagebuf_trace_buf = ktrace_alloc(PAGEBUF_TRACE_SIZE, KM_SLEEP);
+        xfs_buf_trace_buf = ktrace_alloc(XFS_BUF_TRACE_SIZE, KM_SLEEP);
 #endif
-        pagebuf_zone = kmem_zone_init(sizeof(xfs_buf_t), "xfs_buf");
+        xfs_buf_zone = kmem_zone_init(sizeof(xfs_buf_t), "xfs_buf");
-        if (!pagebuf_zone)
+        if (!xfs_buf_zone)
                goto out_free_trace_buf;
        xfslogd_workqueue = create_workqueue("xfslogd");
@@ -1847,42 +1815,33 @@ pagebuf_init(void)
        if (!xfsdatad_workqueue)
                goto out_destroy_xfslogd_workqueue;
-        xfsbufd_task = kthread_run(xfsbufd, NULL, "xfsbufd");
+        xfs_buf_shake = kmem_shake_register(xfsbufd_wakeup);
-        if (IS_ERR(xfsbufd_task)) {
+        if (!xfs_buf_shake)
-                error = PTR_ERR(xfsbufd_task);
                goto out_destroy_xfsdatad_workqueue;
-        }
-        pagebuf_shake = kmem_shake_register(xfsbufd_wakeup);
-        if (!pagebuf_shake)
-                goto out_stop_xfsbufd;
        return 0;
- out_stop_xfsbufd:
-        kthread_stop(xfsbufd_task);
 out_destroy_xfsdatad_workqueue:
        destroy_workqueue(xfsdatad_workqueue);
 out_destroy_xfslogd_workqueue:
        destroy_workqueue(xfslogd_workqueue);
 out_free_buf_zone:
-        kmem_zone_destroy(pagebuf_zone);
+        kmem_zone_destroy(xfs_buf_zone);
 out_free_trace_buf:
-#ifdef PAGEBUF_TRACE
+#ifdef XFS_BUF_TRACE
-        ktrace_free(pagebuf_trace_buf);
+        ktrace_free(xfs_buf_trace_buf);
 #endif
        return error;
 }
 void
-pagebuf_terminate(void)
+xfs_buf_terminate(void)
 {
-        kmem_shake_deregister(pagebuf_shake);
+        kmem_shake_deregister(xfs_buf_shake);
-        kthread_stop(xfsbufd_task);
        destroy_workqueue(xfsdatad_workqueue);
        destroy_workqueue(xfslogd_workqueue);
-        kmem_zone_destroy(pagebuf_zone);
+        kmem_zone_destroy(xfs_buf_zone);
-#ifdef PAGEBUF_TRACE
+#ifdef XFS_BUF_TRACE
-        ktrace_free(pagebuf_trace_buf);
+        ktrace_free(xfs_buf_trace_buf);
 #endif
 }
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 237a35b915d1..4dd6592d5a4c 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -32,44 +32,47 @@
 *      Base types
 */
-#define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL))
+#define XFS_BUF_DADDR_NULL      ((xfs_daddr_t) (-1LL))
-#define page_buf_ctob(pp)       ((pp) * PAGE_CACHE_SIZE)
+#define xfs_buf_ctob(pp)        ((pp) * PAGE_CACHE_SIZE)
-#define page_buf_btoc(dd)       (((dd) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT)
+#define xfs_buf_btoc(dd)        (((dd) + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT)
-#define page_buf_btoct(dd)      ((dd) >> PAGE_CACHE_SHIFT)
+#define xfs_buf_btoct(dd)       ((dd) >> PAGE_CACHE_SHIFT)
-#define page_buf_poff(aa)       ((aa) & ~PAGE_CACHE_MASK)
+#define xfs_buf_poff(aa)        ((aa) & ~PAGE_CACHE_MASK)
-typedef enum page_buf_rw_e {
+typedef enum {
-        PBRW_READ = 1,                  /* transfer into target memory */
+        XBRW_READ = 1,                  /* transfer into target memory */
-        PBRW_WRITE = 2,                 /* transfer from target memory */
+        XBRW_WRITE = 2,                 /* transfer from target memory */
-        PBRW_ZERO = 3                   /* Zero target memory */
+        XBRW_ZERO = 3,                  /* Zero target memory */
-} page_buf_rw_t;
+} xfs_buf_rw_t;
+typedef enum {
-typedef enum page_buf_flags_e {         /* pb_flags values */
+        XBF_READ = (1 << 0),    /* buffer intended for reading from device */
-        PBF_READ = (1 << 0),    /* buffer intended for reading from device */
+        XBF_WRITE = (1 << 1),   /* buffer intended for writing to device   */
-        PBF_WRITE = (1 << 1),   /* buffer intended for writing to device   */
+        XBF_MAPPED = (1 << 2),  /* buffer mapped (b_addr valid)            */
-        PBF_MAPPED = (1 << 2),  /* buffer mapped (pb_addr valid)           */
+        XBF_ASYNC = (1 << 4),   /* initiator will not wait for completion  */
-        PBF_ASYNC = (1 << 4),   /* initiator will not wait for completion  */
+        XBF_DONE = (1 << 5),    /* all pages in the buffer uptodate        */
-        PBF_DONE = (1 << 5),    /* all pages in the buffer uptodate        */
+        XBF_DELWRI = (1 << 6),  /* buffer has dirty pages                  */
-        PBF_DELWRI = (1 << 6),  /* buffer has dirty pages                  */
+        XBF_STALE = (1 << 7),   /* buffer has been staled, do not find it  */
-        PBF_STALE = (1 << 7),   /* buffer has been staled, do not find it  */
+        XBF_FS_MANAGED = (1 << 8),  /* filesystem controls freeing memory  */
-        PBF_FS_MANAGED = (1 << 8),  /* filesystem controls freeing memory  */
+        XBF_ORDERED = (1 << 11),    /* use ordered writes                  */
-        PBF_ORDERED = (1 << 11),    /* use ordered writes                  */
+        XBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead             */
-        PBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead             */
        /* flags used only as arguments to access routines */
-        PBF_LOCK = (1 << 14),       /* lock requested                      */
+        XBF_LOCK = (1 << 14),       /* lock requested                      */
-        PBF_TRYLOCK = (1 << 15),    /* lock requested, but do not wait     */
+        XBF_TRYLOCK = (1 << 15),    /* lock requested, but do not wait     */
-        PBF_DONT_BLOCK = (1 << 16), /* do not block in current thread      */
+        XBF_DONT_BLOCK = (1 << 16), /* do not block in current thread      */
        /* flags used only internally */
-        _PBF_PAGE_CACHE = (1 << 17),/* backed by pagecache                 */
+        _XBF_PAGE_CACHE = (1 << 17),/* backed by pagecache                 */
-        _PBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc()              */
+        _XBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc()              */
-        _PBF_RUN_QUEUES = (1 << 19),/* run block device task queue         */
+        _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue         */
-        _PBF_DELWRI_Q = (1 << 21),   /* buffer on delwri queue             */
+        _XBF_DELWRI_Q = (1 << 21),   /* buffer on delwri queue             */
-} page_buf_flags_t;
+} xfs_buf_flags_t;
+typedef enum {
+        XBT_FORCE_SLEEP = (0 << 1),
+        XBT_FORCE_FLUSH = (1 << 1),
+} xfs_buftarg_flags_t;
 typedef struct xfs_bufhash {
        struct list_head        bh_list;
@@ -77,477 +80,350 @@ typedef struct xfs_bufhash {
 } xfs_bufhash_t;
 typedef struct xfs_buftarg {
-        dev_t                   pbr_dev;
+        dev_t                   bt_dev;
-        struct block_device     *pbr_bdev;
+        struct block_device     *bt_bdev;
-        struct address_space    *pbr_mapping;
+        struct address_space    *bt_mapping;
-        unsigned int            pbr_bsize;
+        unsigned int            bt_bsize;
-        unsigned int            pbr_sshift;
+        unsigned int            bt_sshift;
-        size_t                  pbr_smask;
+        size_t                  bt_smask;
-        /* per-device buffer hash table */
+        /* per device buffer hash table */
        uint                    bt_hashmask;
        uint                    bt_hashshift;
        xfs_bufhash_t           *bt_hash;
+        /* per device delwri queue */
+        struct task_struct      *bt_task;
+        struct list_head        bt_list;
+        struct list_head        bt_delwrite_queue;
+        spinlock_t              bt_delwrite_lock;
+        unsigned long           bt_flags;
 } xfs_buftarg_t;
 /*
- *      xfs_buf_t:  Buffer structure for page cache-based buffers
+ *      xfs_buf_t:  Buffer structure for pagecache-based buffers
+ *
+ * This buffer structure is used by the pagecache buffer management routines
+ * to refer to an assembly of pages forming a logical buffer.
 *
- * This buffer structure is used by the page cache buffer management routines
+ * The buffer structure is used on a temporary basis only, and discarded when
- * to refer to an assembly of pages forming a logical buffer.  The actual I/O
+ * released.  The real data storage is recorded in the pagecache. Buffers are
- * is performed with buffer_head structures, as required by drivers.
- * 
- * The buffer structure is used on temporary basis only, and discarded when
- * released.  The real data storage is recorded in the page cache.  Metadata is
 * hashed to the block device on which the file system resides.
 */
 struct xfs_buf;
+typedef void (*xfs_buf_iodone_t)(struct xfs_buf *);
+typedef void (*xfs_buf_relse_t)(struct xfs_buf *);
+typedef int (*xfs_buf_bdstrat_t)(struct xfs_buf *);
-/* call-back function on I/O completion */
+#define XB_PAGES        2
-typedef void (*page_buf_iodone_t)(struct xfs_buf *);
-/* call-back function on I/O completion */
-typedef void (*page_buf_relse_t)(struct xfs_buf *);
-/* pre-write function */
-typedef int (*page_buf_bdstrat_t)(struct xfs_buf *);
-#define PB_PAGES        2
 typedef struct xfs_buf {
-        struct semaphore        pb_sema;        /* semaphore for lockables  */
+        struct semaphore        b_sema;         /* semaphore for lockables */
-        unsigned long           pb_queuetime;   /* time buffer was queued   */
+        unsigned long           b_queuetime;    /* time buffer was queued */
-        atomic_t                pb_pin_count;   /* pin count                */
+        atomic_t                b_pin_count;    /* pin count */
-        wait_queue_head_t       pb_waiters;     /* unpin waiters            */
+        wait_queue_head_t       b_waiters;      /* unpin waiters */
-        struct list_head        pb_list;
+        struct list_head        b_list;
-        page_buf_flags_t        pb_flags;       /* status flags */
+        xfs_buf_flags_t         b_flags;        /* status flags */
-        struct list_head        pb_hash_list;   /* hash table list */
+        struct list_head        b_hash_list;    /* hash table list */
-        xfs_bufhash_t           *pb_hash;       /* hash table list start */
+        xfs_bufhash_t           *b_hash;        /* hash table list start */
-        xfs_buftarg_t           *pb_target;     /* buffer target (device) */
+        xfs_buftarg_t           *b_target;      /* buffer target (device) */
-        atomic_t                pb_hold;        /* reference count */
+        atomic_t                b_hold;         /* reference count */
-        xfs_daddr_t             pb_bn;          /* block number for I/O */
+        xfs_daddr_t             b_bn;           /* block number for I/O */
-        loff_t                  pb_file_offset; /* offset in file */
+        xfs_off_t               b_file_offset;  /* offset in file */
-        size_t                  pb_buffer_length; /* size of buffer in bytes */
+        size_t                  b_buffer_length;/* size of buffer in bytes */
-        size_t                  pb_count_desired; /* desired transfer size */
+        size_t                  b_count_desired;/* desired transfer size */
-        void                    *pb_addr;       /* virtual address of buffer */
+        void                    *b_addr;        /* virtual address of buffer */
-        struct work_struct      pb_iodone_work;
+        struct work_struct      b_iodone_work;
-        atomic_t                pb_io_remaining;/* #outstanding I/O requests */
+        atomic_t                b_io_remaining; /* #outstanding I/O requests */
-        page_buf_iodone_t       pb_iodone;      /* I/O completion function */
+        xfs_buf_iodone_t        b_iodone;       /* I/O completion function */
-        page_buf_relse_t        pb_relse;       /* releasing function */
+        xfs_buf_relse_t         b_relse;        /* releasing function */
-        page_buf_bdstrat_t      pb_strat;       /* pre-write function */
+        xfs_buf_bdstrat_t       b_strat;        /* pre-write function */
-        struct semaphore        pb_iodonesema;  /* Semaphore for I/O waiters */
+        struct semaphore        b_iodonesema;   /* Semaphore for I/O waiters */
-        void                    *pb_fspriv;
+        void                    *b_fspriv;
-        void                    *pb_fspriv2;
+        void                    *b_fspriv2;
-        void                    *pb_fspriv3;
+        void                    *b_fspriv3;
-        unsigned short          pb_error;       /* error code on I/O */
+        unsigned short          b_error;        /* error code on I/O */
-        unsigned short          pb_locked;      /* page array is locked */
+        unsigned short          b_locked;       /* page array is locked */
-        unsigned int            pb_page_count;  /* size of page array */
+        unsigned int            b_page_count;   /* size of page array */
-        unsigned int            pb_offset;      /* page offset in first page */
+        unsigned int            b_offset;       /* page offset in first page */
-        struct page             **pb_pages;     /* array of page pointers */
+        struct page             **b_pages;      /* array of page pointers */
-        struct page             *pb_page_array[PB_PAGES]; /* inline pages */
+        struct page             *b_page_array[XB_PAGES]; /* inline pages */
-#ifdef PAGEBUF_LOCK_TRACKING
+#ifdef XFS_BUF_LOCK_TRACKING
-        int                     pb_last_holder;
+        int                     b_last_holder;
 #endif
 } xfs_buf_t;
 /* Finding and Reading Buffers */
+extern xfs_buf_t *_xfs_buf_find(xfs_buftarg_t *, xfs_off_t, size_t,
-extern xfs_buf_t *_pagebuf_find(        /* find buffer for block if     */
+                                xfs_buf_flags_t, xfs_buf_t *);
-                                        /* the block is in memory       */
-                xfs_buftarg_t *,        /* inode for block              */
-                loff_t,                 /* starting offset of range     */
-                size_t,                 /* length of range              */
-                page_buf_flags_t,       /* PBF_LOCK                     */
-                xfs_buf_t *);           /* newly allocated buffer       */
 #define xfs_incore(buftarg,blkno,len,lockit) \
-        _pagebuf_find(buftarg, blkno ,len, lockit, NULL)
+        _xfs_buf_find(buftarg, blkno ,len, lockit, NULL)
-extern xfs_buf_t *xfs_buf_get_flags(    /* allocate a buffer            */
-                xfs_buftarg_t *,        /* inode for buffer             */
-                loff_t,                 /* starting offset of range     */
-                size_t,                 /* length of range              */
-                page_buf_flags_t);      /* PBF_LOCK, PBF_READ,          */
-                                        /* PBF_ASYNC                    */
+extern xfs_buf_t *xfs_buf_get_flags(xfs_buftarg_t *, xfs_off_t, size_t,
+                                xfs_buf_flags_t);
 #define xfs_buf_get(target, blkno, len, flags) \
-        xfs_buf_get_flags((target), (blkno), (len), PBF_LOCK | PBF_MAPPED)
+        xfs_buf_get_flags((target), (blkno), (len), XBF_LOCK | XBF_MAPPED)
-extern xfs_buf_t *xfs_buf_read_flags(   /* allocate and read a buffer   */
-                xfs_buftarg_t *,        /* inode for buffer             */
-                loff_t,                 /* starting offset of range     */
-                size_t,                 /* length of range              */
-                page_buf_flags_t);      /* PBF_LOCK, PBF_ASYNC          */
+extern xfs_buf_t *xfs_buf_read_flags(xfs_buftarg_t *, xfs_off_t, size_t,
+                                xfs_buf_flags_t);
 #define xfs_buf_read(target, blkno, len, flags) \
-        xfs_buf_read_flags((target), (blkno), (len), PBF_LOCK | PBF_MAPPED)
+        xfs_buf_read_flags((target), (blkno), (len), XBF_LOCK | XBF_MAPPED)
-extern xfs_buf_t *pagebuf_get_empty(    /* allocate pagebuf struct with */
-                                        /*  no memory or disk address   */
-                size_t len,
-                xfs_buftarg_t *);       /* mount point "fake" inode     */
-extern xfs_buf_t *pagebuf_get_no_daddr(/* allocate pagebuf struct       */
-                                        /* without disk address         */
-                size_t len,
-                xfs_buftarg_t *);       /* mount point "fake" inode     */
-extern int pagebuf_associate_memory(
-                xfs_buf_t *,
-                void *,
-                size_t);
-extern void pagebuf_hold(               /* increment reference count    */
-                xfs_buf_t *);           /* buffer to hold               */
-extern void pagebuf_readahead(          /* read ahead into cache        */
+extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *);
-                xfs_buftarg_t  *,       /* target for buffer (or NULL)  */
+extern xfs_buf_t *xfs_buf_get_noaddr(size_t, xfs_buftarg_t *);
-                loff_t,                 /* starting offset of range     */
+extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t);
-                size_t,                 /* length of range              */
+extern void xfs_buf_hold(xfs_buf_t *);
-                page_buf_flags_t);      /* additional read flags        */
+extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t,
+                                xfs_buf_flags_t);
 /* Releasing Buffers */
+extern void xfs_buf_free(xfs_buf_t *);
-extern void pagebuf_free(               /* deallocate a buffer          */
+extern void xfs_buf_rele(xfs_buf_t *);
-                xfs_buf_t *);           /* buffer to deallocate         */
-extern void pagebuf_rele(               /* release hold on a buffer     */
-                xfs_buf_t *);           /* buffer to release            */
 /* Locking and Unlocking Buffers */
+extern int xfs_buf_cond_lock(xfs_buf_t *);
-extern int pagebuf_cond_lock(           /* lock buffer, if not locked   */
+extern int xfs_buf_lock_value(xfs_buf_t *);
-                                        /* (returns -EBUSY if locked)   */
+extern void xfs_buf_lock(xfs_buf_t *);
-                xfs_buf_t *);           /* buffer to lock               */
+extern void xfs_buf_unlock(xfs_buf_t *);
-extern int pagebuf_lock_value(          /* return count on lock         */
-                xfs_buf_t *);          /* buffer to check              */
-extern int pagebuf_lock(                /* lock buffer                  */
-                xfs_buf_t *);          /* buffer to lock               */
-extern void pagebuf_unlock(             /* unlock buffer                */
-                xfs_buf_t *);           /* buffer to unlock             */
 /* Buffer Read and Write Routines */
+extern void xfs_buf_ioend(xfs_buf_t *,  int);
-extern void pagebuf_iodone(             /* mark buffer I/O complete     */
+extern void xfs_buf_ioerror(xfs_buf_t *, int);
-                xfs_buf_t *,            /* buffer to mark               */
+extern int xfs_buf_iostart(xfs_buf_t *, xfs_buf_flags_t);
-                int);                   /* run completion locally, or in
+extern int xfs_buf_iorequest(xfs_buf_t *);
-                                         * a helper thread.             */
+extern int xfs_buf_iowait(xfs_buf_t *);
+extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, xfs_caddr_t,
-extern void pagebuf_ioerror(            /* mark buffer in error (or not) */
+                                xfs_buf_rw_t);
-                xfs_buf_t *,            /* buffer to mark               */
-                int);                   /* error to store (0 if none)   */
+static inline int xfs_buf_iostrategy(xfs_buf_t *bp)
-extern int pagebuf_iostart(             /* start I/O on a buffer        */
-                xfs_buf_t *,            /* buffer to start              */
-                page_buf_flags_t);      /* PBF_LOCK, PBF_ASYNC,         */
-                                        /* PBF_READ, PBF_WRITE,         */
-                                        /* PBF_DELWRI                   */
-extern int pagebuf_iorequest(           /* start real I/O               */
-                xfs_buf_t *);           /* buffer to convey to device   */
-extern int pagebuf_iowait(              /* wait for buffer I/O done     */
-                xfs_buf_t *);           /* buffer to wait on            */
-extern void pagebuf_iomove(             /* move data in/out of pagebuf  */
-                xfs_buf_t *,            /* buffer to manipulate         */
-                size_t,                 /* starting buffer offset       */
-                size_t,                 /* length in buffer             */
-                caddr_t,                /* data pointer                 */
-                page_buf_rw_t);         /* direction                    */
-static inline int pagebuf_iostrategy(xfs_buf_t *pb)
 {
-        return pb->pb_strat ? pb->pb_strat(pb) : pagebuf_iorequest(pb);
+        return bp->b_strat ? bp->b_strat(bp) : xfs_buf_iorequest(bp);
 }
-static inline int pagebuf_geterror(xfs_buf_t *pb)
+static inline int xfs_buf_geterror(xfs_buf_t *bp)
 {
-        return pb ? pb->pb_error : ENOMEM;
+        return bp ? bp->b_error : ENOMEM;
 }
 /* Buffer Utility Routines */
+extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t);
-extern caddr_t pagebuf_offset(          /* pointer at offset in buffer  */
-                xfs_buf_t *,            /* buffer to offset into        */
-                size_t);                /* offset                       */
 /* Pinning Buffer Storage in Memory */
+extern void xfs_buf_pin(xfs_buf_t *);
-extern void pagebuf_pin(                /* pin buffer in memory         */
+extern void xfs_buf_unpin(xfs_buf_t *);
-                xfs_buf_t *);           /* buffer to pin                */
+extern int xfs_buf_ispin(xfs_buf_t *);
-extern void pagebuf_unpin(              /* unpin buffered data          */
-                xfs_buf_t *);           /* buffer to unpin              */
-extern int pagebuf_ispin(               /* check if buffer is pinned    */
-                xfs_buf_t *);           /* buffer to check              */
 /* Delayed Write Buffer Routines */
+extern void xfs_buf_delwri_dequeue(xfs_buf_t *);
-extern void pagebuf_delwri_dequeue(xfs_buf_t *);
 /* Buffer Daemon Setup Routines */
+extern int xfs_buf_init(void);
+extern void xfs_buf_terminate(void);
-extern int pagebuf_init(void);
+#ifdef XFS_BUF_TRACE
-extern void pagebuf_terminate(void);
+extern ktrace_t *xfs_buf_trace_buf;
+extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *);
-#ifdef PAGEBUF_TRACE
-extern ktrace_t *pagebuf_trace_buf;
-extern void pagebuf_trace(
-                xfs_buf_t *,            /* buffer being traced          */
-                char *,                 /* description of operation     */
-                void *,                 /* arbitrary diagnostic value   */
-                void *);                /* return address               */
 #else
-# define pagebuf_trace(pb, id, ptr, ra) do { } while (0)
+#define xfs_buf_trace(bp,id,ptr,ra)     do { } while (0)
 #endif
-#define pagebuf_target_name(target)     \
+#define xfs_buf_target_name(target)     \
-        ({ char __b[BDEVNAME_SIZE]; bdevname((target)->pbr_bdev, __b); __b; })
+        ({ char __b[BDEVNAME_SIZE]; bdevname((target)->bt_bdev, __b); __b; })
+#define XFS_B_ASYNC             XBF_ASYNC
+#define XFS_B_DELWRI            XBF_DELWRI
+#define XFS_B_READ              XBF_READ
+#define XFS_B_WRITE             XBF_WRITE
+#define XFS_B_STALE             XBF_STALE
-/* These are just for xfs_syncsub... it sets an internal variable
+#define XFS_BUF_TRYLOCK         XBF_TRYLOCK
- * then passes it to VOP_FLUSH_PAGES or adds the flags to a newly gotten buf_t
+#define XFS_INCORE_TRYLOCK      XBF_TRYLOCK
- */
+#define XFS_BUF_LOCK            XBF_LOCK
-#define XFS_B_ASYNC             PBF_ASYNC
+#define XFS_BUF_MAPPED          XBF_MAPPED
-#define XFS_B_DELWRI            PBF_DELWRI
-#define XFS_B_READ              PBF_READ
-#define XFS_B_WRITE             PBF_WRITE
-#define XFS_B_STALE             PBF_STALE
-#define XFS_BUF_TRYLOCK         PBF_TRYLOCK
-#define XFS_INCORE_TRYLOCK      PBF_TRYLOCK
-#define XFS_BUF_LOCK            PBF_LOCK
-#define XFS_BUF_MAPPED          PBF_MAPPED
-#define BUF_BUSY                PBF_DONT_BLOCK
-#define XFS_BUF_BFLAGS(x)       ((x)->pb_flags)
-#define XFS_BUF_ZEROFLAGS(x)    \
-        ((x)->pb_flags &= ~(PBF_READ|PBF_WRITE|PBF_ASYNC|PBF_DELWRI))
-#define XFS_BUF_STALE(x)        ((x)->pb_flags |= XFS_B_STALE)
-#define XFS_BUF_UNSTALE(x)      ((x)->pb_flags &= ~XFS_B_STALE)
-#define XFS_BUF_ISSTALE(x)      ((x)->pb_flags & XFS_B_STALE)
-#define XFS_BUF_SUPER_STALE(x)  do {                            \
-                                        XFS_BUF_STALE(x);       \
-                                        pagebuf_delwri_dequeue(x);      \
-                                        XFS_BUF_DONE(x);        \
-                                } while (0)
-#define XFS_BUF_MANAGE          PBF_FS_MANAGED
+#define BUF_BUSY                XBF_DONT_BLOCK
-#define XFS_BUF_UNMANAGE(x)     ((x)->pb_flags &= ~PBF_FS_MANAGED)
+#define XFS_BUF_BFLAGS(bp)      ((bp)->b_flags)
-#define XFS_BUF_DELAYWRITE(x)    ((x)->pb_flags |= PBF_DELWRI)
+#define XFS_BUF_ZEROFLAGS(bp)   \
-#define XFS_BUF_UNDELAYWRITE(x)  pagebuf_delwri_dequeue(x)
+        ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI))
-#define XFS_BUF_ISDELAYWRITE(x)  ((x)->pb_flags & PBF_DELWRI)
+#define XFS_BUF_STALE(bp)       ((bp)->b_flags |= XFS_B_STALE)
-#define XFS_BUF_ERROR(x,no)      pagebuf_ioerror(x,no)
+#define XFS_BUF_UNSTALE(bp)     ((bp)->b_flags &= ~XFS_B_STALE)
-#define XFS_BUF_GETERROR(x)      pagebuf_geterror(x)
+#define XFS_BUF_ISSTALE(bp)     ((bp)->b_flags & XFS_B_STALE)
-#define XFS_BUF_ISERROR(x)       (pagebuf_geterror(x)?1:0)
+#define XFS_BUF_SUPER_STALE(bp) do {                            \
+                                        XFS_BUF_STALE(bp);      \
-#define XFS_BUF_DONE(x)          ((x)->pb_flags |= PBF_DONE)
+                                        xfs_buf_delwri_dequeue(bp);     \
-#define XFS_BUF_UNDONE(x)        ((x)->pb_flags &= ~PBF_DONE)
+                                        XFS_BUF_DONE(bp);       \
-#define XFS_BUF_ISDONE(x)        ((x)->pb_flags & PBF_DONE)
+                                } while (0)
-#define XFS_BUF_BUSY(x)          do { } while (0)
-#define XFS_BUF_UNBUSY(x)        do { } while (0)
-#define XFS_BUF_ISBUSY(x)        (1)
-#define XFS_BUF_ASYNC(x)         ((x)->pb_flags |= PBF_ASYNC)
-#define XFS_BUF_UNASYNC(x)       ((x)->pb_flags &= ~PBF_ASYNC)
-#define XFS_BUF_ISASYNC(x)       ((x)->pb_flags & PBF_ASYNC)
-#define XFS_BUF_ORDERED(x)       ((x)->pb_flags |= PBF_ORDERED)
-#define XFS_BUF_UNORDERED(x)     ((x)->pb_flags &= ~PBF_ORDERED)
-#define XFS_BUF_ISORDERED(x)     ((x)->pb_flags & PBF_ORDERED)
-#define XFS_BUF_SHUT(x)          printk("XFS_BUF_SHUT not implemented yet\n")
-#define XFS_BUF_UNSHUT(x)        printk("XFS_BUF_UNSHUT not implemented yet\n")
-#define XFS_BUF_ISSHUT(x)        (0)
-#define XFS_BUF_HOLD(x)         pagebuf_hold(x)
-#define XFS_BUF_READ(x)         ((x)->pb_flags |= PBF_READ)
-#define XFS_BUF_UNREAD(x)       ((x)->pb_flags &= ~PBF_READ)
-#define XFS_BUF_ISREAD(x)       ((x)->pb_flags & PBF_READ)
-#define XFS_BUF_WRITE(x)        ((x)->pb_flags |= PBF_WRITE)
-#define XFS_BUF_UNWRITE(x)      ((x)->pb_flags &= ~PBF_WRITE)
-#define XFS_BUF_ISWRITE(x)      ((x)->pb_flags & PBF_WRITE)
-#define XFS_BUF_ISUNINITIAL(x)   (0)
-#define XFS_BUF_UNUNINITIAL(x)   (0)
-#define XFS_BUF_BP_ISMAPPED(bp)  1
-#define XFS_BUF_IODONE_FUNC(buf)        (buf)->pb_iodone
-#define XFS_BUF_SET_IODONE_FUNC(buf, func)      \
-                        (buf)->pb_iodone = (func)
-#define XFS_BUF_CLR_IODONE_FUNC(buf)            \
-                        (buf)->pb_iodone = NULL
-#define XFS_BUF_SET_BDSTRAT_FUNC(buf, func)     \
-                        (buf)->pb_strat = (func)
-#define XFS_BUF_CLR_BDSTRAT_FUNC(buf)           \
-                        (buf)->pb_strat = NULL
-#define XFS_BUF_FSPRIVATE(buf, type)            \
-                        ((type)(buf)->pb_fspriv)
-#define XFS_BUF_SET_FSPRIVATE(buf, value)       \
-                        (buf)->pb_fspriv = (void *)(value)
-#define XFS_BUF_FSPRIVATE2(buf, type)           \
-                        ((type)(buf)->pb_fspriv2)
-#define XFS_BUF_SET_FSPRIVATE2(buf, value)      \
-                        (buf)->pb_fspriv2 = (void *)(value)
-#define XFS_BUF_FSPRIVATE3(buf, type)           \
-                        ((type)(buf)->pb_fspriv3)
-#define XFS_BUF_SET_FSPRIVATE3(buf, value)      \
-                        (buf)->pb_fspriv3  = (void *)(value)
-#define XFS_BUF_SET_START(buf)
-#define XFS_BUF_SET_BRELSE_FUNC(buf, value) \
-                        (buf)->pb_relse = (value)
-#define XFS_BUF_PTR(bp)         (xfs_caddr_t)((bp)->pb_addr)
-static inline xfs_caddr_t xfs_buf_offset(xfs_buf_t *bp, size_t offset)
-{
-        if (bp->pb_flags & PBF_MAPPED)
-                return XFS_BUF_PTR(bp) + offset;
-        return (xfs_caddr_t) pagebuf_offset(bp, offset);
-}
-#define XFS_BUF_SET_PTR(bp, val, count)         \
+#define XFS_BUF_MANAGE          XBF_FS_MANAGED
-                                pagebuf_associate_memory(bp, val, count)
+#define XFS_BUF_UNMANAGE(bp)    ((bp)->b_flags &= ~XBF_FS_MANAGED)
-#define XFS_BUF_ADDR(bp)        ((bp)->pb_bn)
-#define XFS_BUF_SET_ADDR(bp, blk)               \
+#define XFS_BUF_DELAYWRITE(bp)          ((bp)->b_flags |= XBF_DELWRI)
-                        ((bp)->pb_bn = (xfs_daddr_t)(blk))
+#define XFS_BUF_UNDELAYWRITE(bp)        xfs_buf_delwri_dequeue(bp)
-#define XFS_BUF_OFFSET(bp)      ((bp)->pb_file_offset)
+#define XFS_BUF_ISDELAYWRITE(bp)        ((bp)->b_flags & XBF_DELWRI)
-#define XFS_BUF_SET_OFFSET(bp, off)             \
-                        ((bp)->pb_file_offset = (off))
+#define XFS_BUF_ERROR(bp,no)    xfs_buf_ioerror(bp,no)
-#define XFS_BUF_COUNT(bp)       ((bp)->pb_count_desired)
+#define XFS_BUF_GETERROR(bp)    xfs_buf_geterror(bp)
-#define XFS_BUF_SET_COUNT(bp, cnt)              \
+#define XFS_BUF_ISERROR(bp)     (xfs_buf_geterror(bp) ? 1 : 0)
-                        ((bp)->pb_count_desired = (cnt))
-#define XFS_BUF_SIZE(bp)        ((bp)->pb_buffer_length)
+#define XFS_BUF_DONE(bp)        ((bp)->b_flags |= XBF_DONE)
-#define XFS_BUF_SET_SIZE(bp, cnt)               \
+#define XFS_BUF_UNDONE(bp)      ((bp)->b_flags &= ~XBF_DONE)
-                        ((bp)->pb_buffer_length = (cnt))
+#define XFS_BUF_ISDONE(bp)      ((bp)->b_flags & XBF_DONE)
-#define XFS_BUF_SET_VTYPE_REF(bp, type, ref)
-#define XFS_BUF_SET_VTYPE(bp, type)
+#define XFS_BUF_BUSY(bp)        do { } while (0)
-#define XFS_BUF_SET_REF(bp, ref)
+#define XFS_BUF_UNBUSY(bp)      do { } while (0)
+#define XFS_BUF_ISBUSY(bp)      (1)
-#define XFS_BUF_ISPINNED(bp)    pagebuf_ispin(bp)
+#define XFS_BUF_ASYNC(bp)       ((bp)->b_flags |= XBF_ASYNC)
-#define XFS_BUF_VALUSEMA(bp)    pagebuf_lock_value(bp)
+#define XFS_BUF_UNASYNC(bp)     ((bp)->b_flags &= ~XBF_ASYNC)
-#define XFS_BUF_CPSEMA(bp)      (pagebuf_cond_lock(bp) == 0)
+#define XFS_BUF_ISASYNC(bp)     ((bp)->b_flags & XBF_ASYNC)
-#define XFS_BUF_VSEMA(bp)       pagebuf_unlock(bp)
-#define XFS_BUF_PSEMA(bp,x)     pagebuf_lock(bp)
+#define XFS_BUF_ORDERED(bp)     ((bp)->b_flags |= XBF_ORDERED)
-#define XFS_BUF_V_IODONESEMA(bp) up(&bp->pb_iodonesema);
+#define XFS_BUF_UNORDERED(bp)   ((bp)->b_flags &= ~XBF_ORDERED)
+#define XFS_BUF_ISORDERED(bp)   ((bp)->b_flags & XBF_ORDERED)
-/* setup the buffer target from a buftarg structure */
-#define XFS_BUF_SET_TARGET(bp, target)  \
+#define XFS_BUF_SHUT(bp)        do { } while (0)
-                (bp)->pb_target = (target)
+#define XFS_BUF_UNSHUT(bp)      do { } while (0)
-#define XFS_BUF_TARGET(bp)      ((bp)->pb_target)
+#define XFS_BUF_ISSHUT(bp)      (0)
-#define XFS_BUFTARG_NAME(target)        \
-                pagebuf_target_name(target)
+#define XFS_BUF_HOLD(bp)        xfs_buf_hold(bp)
+#define XFS_BUF_READ(bp)        ((bp)->b_flags |= XBF_READ)
-#define XFS_BUF_SET_VTYPE_REF(bp, type, ref)
+#define XFS_BUF_UNREAD(bp)      ((bp)->b_flags &= ~XBF_READ)
-#define XFS_BUF_SET_VTYPE(bp, type)
+#define XFS_BUF_ISREAD(bp)      ((bp)->b_flags & XBF_READ)
-#define XFS_BUF_SET_REF(bp, ref)
+#define XFS_BUF_WRITE(bp)       ((bp)->b_flags |= XBF_WRITE)
-static inline int       xfs_bawrite(void *mp, xfs_buf_t *bp)
+#define XFS_BUF_UNWRITE(bp)     ((bp)->b_flags &= ~XBF_WRITE)
+#define XFS_BUF_ISWRITE(bp)     ((bp)->b_flags & XBF_WRITE)
+#define XFS_BUF_ISUNINITIAL(bp) (0)
+#define XFS_BUF_UNUNINITIAL(bp) (0)
+#define XFS_BUF_BP_ISMAPPED(bp) (1)
+#define XFS_BUF_IODONE_FUNC(bp)                 ((bp)->b_iodone)
+#define XFS_BUF_SET_IODONE_FUNC(bp, func)       ((bp)->b_iodone = (func))
+#define XFS_BUF_CLR_IODONE_FUNC(bp)             ((bp)->b_iodone = NULL)
+#define XFS_BUF_SET_BDSTRAT_FUNC(bp, func)      ((bp)->b_strat = (func))
+#define XFS_BUF_CLR_BDSTRAT_FUNC(bp)            ((bp)->b_strat = NULL)
+#define XFS_BUF_FSPRIVATE(bp, type)             ((type)(bp)->b_fspriv)
+#define XFS_BUF_SET_FSPRIVATE(bp, val)          ((bp)->b_fspriv = (void*)(val))
+#define XFS_BUF_FSPRIVATE2(bp, type)            ((type)(bp)->b_fspriv2)
+#define XFS_BUF_SET_FSPRIVATE2(bp, val)         ((bp)->b_fspriv2 = (void*)(val))
+#define XFS_BUF_FSPRIVATE3(bp, type)            ((type)(bp)->b_fspriv3)
+#define XFS_BUF_SET_FSPRIVATE3(bp, val)         ((bp)->b_fspriv3 = (void*)(val))
+#define XFS_BUF_SET_START(bp)                   do { } while (0)
+#define XFS_BUF_SET_BRELSE_FUNC(bp, func)       ((bp)->b_relse = (func))
+#define XFS_BUF_PTR(bp)                 (xfs_caddr_t)((bp)->b_addr)
+#define XFS_BUF_SET_PTR(bp, val, cnt)   xfs_buf_associate_memory(bp, val, cnt)
+#define XFS_BUF_ADDR(bp)                ((bp)->b_bn)
+#define XFS_BUF_SET_ADDR(bp, bno)       ((bp)->b_bn = (xfs_daddr_t)(bno))
+#define XFS_BUF_OFFSET(bp)              ((bp)->b_file_offset)
+#define XFS_BUF_SET_OFFSET(bp, off)     ((bp)->b_file_offset = (off))
+#define XFS_BUF_COUNT(bp)               ((bp)->b_count_desired)
+#define XFS_BUF_SET_COUNT(bp, cnt)      ((bp)->b_count_desired = (cnt))
+#define XFS_BUF_SIZE(bp)                ((bp)->b_buffer_length)
+#define XFS_BUF_SET_SIZE(bp, cnt)       ((bp)->b_buffer_length = (cnt))
+#define XFS_BUF_SET_VTYPE_REF(bp, type, ref)    do { } while (0)
+#define XFS_BUF_SET_VTYPE(bp, type)             do { } while (0)
+#define XFS_BUF_SET_REF(bp, ref)                do { } while (0)
+#define XFS_BUF_ISPINNED(bp)    xfs_buf_ispin(bp)
+#define XFS_BUF_VALUSEMA(bp)    xfs_buf_lock_value(bp)
+#define XFS_BUF_CPSEMA(bp)      (xfs_buf_cond_lock(bp) == 0)
+#define XFS_BUF_VSEMA(bp)       xfs_buf_unlock(bp)
+#define XFS_BUF_PSEMA(bp,x)     xfs_buf_lock(bp)
+#define XFS_BUF_V_IODONESEMA(bp) up(&bp->b_iodonesema);
+#define XFS_BUF_SET_TARGET(bp, target)  ((bp)->b_target = (target))
+#define XFS_BUF_TARGET(bp)              ((bp)->b_target)
+#define XFS_BUFTARG_NAME(target)        xfs_buf_target_name(target)
+static inline int xfs_bawrite(void *mp, xfs_buf_t *bp)
 {
-        bp->pb_fspriv3 = mp;
+        bp->b_fspriv3 = mp;
-        bp->pb_strat = xfs_bdstrat_cb;
+        bp->b_strat = xfs_bdstrat_cb;
-        pagebuf_delwri_dequeue(bp);
+        xfs_buf_delwri_dequeue(bp);
-        return pagebuf_iostart(bp, PBF_WRITE | PBF_ASYNC | _PBF_RUN_QUEUES);
+        return xfs_buf_iostart(bp, XBF_WRITE | XBF_ASYNC | _XBF_RUN_QUEUES);
 }
-static inline void      xfs_buf_relse(xfs_buf_t *bp)
+static inline void xfs_buf_relse(xfs_buf_t *bp)
 {
-        if (!bp->pb_relse)
+        if (!bp->b_relse)
-                pagebuf_unlock(bp);
+                xfs_buf_unlock(bp);
-        pagebuf_rele(bp);
+        xfs_buf_rele(bp);
 }
-#define xfs_bpin(bp)            pagebuf_pin(bp)
+#define xfs_bpin(bp)            xfs_buf_pin(bp)
-#define xfs_bunpin(bp)          pagebuf_unpin(bp)
+#define xfs_bunpin(bp)          xfs_buf_unpin(bp)
 #define xfs_buftrace(id, bp)    \
-            pagebuf_trace(bp, id, NULL, (void *)__builtin_return_address(0))
+            xfs_buf_trace(bp, id, NULL, (void *)__builtin_return_address(0))
-#define xfs_biodone(pb)             \
+#define xfs_biodone(bp)         xfs_buf_ioend(bp, 0)
-            pagebuf_iodone(pb, 0)
-#define xfs_biomove(pb, off, len, data, rw) \
+#define xfs_biomove(bp, off, len, data, rw) \
-            pagebuf_iomove((pb), (off), (len), (data), \
+            xfs_buf_iomove((bp), (off), (len), (data), \
-                ((rw) == XFS_B_WRITE) ? PBRW_WRITE : PBRW_READ)
+                ((rw) == XFS_B_WRITE) ? XBRW_WRITE : XBRW_READ)
-#define xfs_biozero(pb, off, len) \
+#define xfs_biozero(bp, off, len) \
-            pagebuf_iomove((pb), (off), (len), NULL, PBRW_ZERO)
+            xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
-static inline int       XFS_bwrite(xfs_buf_t *pb)
+static inline int XFS_bwrite(xfs_buf_t *bp)
 {
-        int     iowait = (pb->pb_flags & PBF_ASYNC) == 0;
+        int     iowait = (bp->b_flags & XBF_ASYNC) == 0;
        int     error = 0;
        if (!iowait)
-                pb->pb_flags |= _PBF_RUN_QUEUES;
+                bp->b_flags |= _XBF_RUN_QUEUES;
-        pagebuf_delwri_dequeue(pb);
+        xfs_buf_delwri_dequeue(bp);
-        pagebuf_iostrategy(pb);
+        xfs_buf_iostrategy(bp);
        if (iowait) {
-                error = pagebuf_iowait(pb);
+                error = xfs_buf_iowait(bp);
-                xfs_buf_relse(pb);
+                xfs_buf_relse(bp);
        }
        return error;
 }
-#define XFS_bdwrite(pb)              \
+#define XFS_bdwrite(bp)         xfs_buf_iostart(bp, XBF_DELWRI | XBF_ASYNC)
-            pagebuf_iostart(pb, PBF_DELWRI | PBF_ASYNC)
 static inline int xfs_bdwrite(void *mp, xfs_buf_t *bp)
 {
-        bp->pb_strat = xfs_bdstrat_cb;
+        bp->b_strat = xfs_bdstrat_cb;
-        bp->pb_fspriv3 = mp;
+        bp->b_fspriv3 = mp;
+        return xfs_buf_iostart(bp, XBF_DELWRI | XBF_ASYNC);
-        return pagebuf_iostart(bp, PBF_DELWRI | PBF_ASYNC);
 }
-#define XFS_bdstrat(bp) pagebuf_iorequest(bp)
+#define XFS_bdstrat(bp) xfs_buf_iorequest(bp)
-#define xfs_iowait(pb)  pagebuf_iowait(pb)
+#define xfs_iowait(bp)  xfs_buf_iowait(bp)
 #define xfs_baread(target, rablkno, ralen)  \
-        pagebuf_readahead((target), (rablkno), (ralen), PBF_DONT_BLOCK)
+        xfs_buf_readahead((target), (rablkno), (ralen), XBF_DONT_BLOCK)
-#define xfs_buf_get_empty(len, target)  pagebuf_get_empty((len), (target))
-#define xfs_buf_get_noaddr(len, target) pagebuf_get_no_daddr((len), (target))
-#define xfs_buf_free(bp)                pagebuf_free(bp)
 /*
 *      Handling of buftargs.
 */
 extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int);
 extern void xfs_free_buftarg(xfs_buftarg_t *, int);
 extern void xfs_wait_buftarg(xfs_buftarg_t *);
 extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
 extern int xfs_flush_buftarg(xfs_buftarg_t *, int);
-#define xfs_getsize_buftarg(buftarg) \
+#define xfs_getsize_buftarg(buftarg)    block_size((buftarg)->bt_bdev)
-        block_size((buftarg)->pbr_bdev)
+#define xfs_readonly_buftarg(buftarg)   bdev_read_only((buftarg)->bt_bdev)
-#define xfs_readonly_buftarg(buftarg) \
-        bdev_read_only((buftarg)->pbr_bdev)
+#define xfs_binval(buftarg)             xfs_flush_buftarg(buftarg, 1)
-#define xfs_binval(buftarg) \
+#define XFS_bflush(buftarg)             xfs_flush_buftarg(buftarg, 1)
-        xfs_flush_buftarg(buftarg, 1)
-#define XFS_bflush(buftarg) \
-        xfs_flush_buftarg(buftarg, 1)
 #endif  /* __XFS_BUF_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 06111d0bbae4..ced4404339c7 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -509,16 +509,14 @@ linvfs_open_exec(
        vnode_t         *vp = LINVFS_GET_VP(inode);
        xfs_mount_t     *mp = XFS_VFSTOM(vp->v_vfsp);
        int             error = 0;
-        bhv_desc_t      *bdp;
        xfs_inode_t     *ip;
        if (vp->v_vfsp->vfs_flag & VFS_DMI) {
-                bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops);
+                ip = xfs_vtoi(vp);
-                if (!bdp) {
+                if (!ip) {
                        error = -EINVAL;
                        goto open_exec_out;
                }
-                ip = XFS_BHVTOI(bdp);
                if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ)) {
                        error = -XFS_SEND_DATA(mp, DM_EVENT_READ, vp,
                                               0, 0, 0, NULL);
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 21667ba6dcd5..4db47790415c 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -146,13 +146,10 @@ xfs_find_handle(
        if (cmd != XFS_IOC_PATH_TO_FSHANDLE) {
                xfs_inode_t     *ip;
-                bhv_desc_t      *bhv;
                int             lock_mode;
                /* need to get access to the xfs_inode to read the generation */
-                bhv = vn_bhv_lookup_unlocked(VN_BHV_HEAD(vp), &xfs_vnodeops);
+                ip = xfs_vtoi(vp);
-                ASSERT(bhv);
-                ip = XFS_BHVTOI(bhv);
                ASSERT(ip);
                lock_mode = xfs_ilock_map_shared(ip);
@@ -751,9 +748,8 @@ xfs_ioctl(
                        (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
                        mp->m_rtdev_targp : mp->m_ddev_targp;
-                da.d_mem = da.d_miniosz = 1 << target->pbr_sshift;
+                da.d_mem = da.d_miniosz = 1 << target->bt_sshift;
-                /* The size dio will do in one go */
+                da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1);
-                da.d_maxiosz = 64 * PAGE_CACHE_SIZE;
                if (copy_to_user(arg, &da, sizeof(da)))
                        return -XFS_ERROR(EFAULT);
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 9b8ee3470ecc..4bd3d03b23ed 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -54,11 +54,46 @@
 #include <linux/capability.h>
 #include <linux/xattr.h>
 #include <linux/namei.h>
+#include <linux/security.h>
 #define IS_NOATIME(inode) ((inode->i_sb->s_flags & MS_NOATIME) ||       \
        (S_ISDIR(inode->i_mode) && inode->i_sb->s_flags & MS_NODIRATIME))
 /*
+ * Get a XFS inode from a given vnode.
+ */
+xfs_inode_t *
+xfs_vtoi(
+        struct vnode    *vp)
+{
+        bhv_desc_t      *bdp;
+        bdp = bhv_lookup_range(VN_BHV_HEAD(vp),
+                        VNODE_POSITION_XFS, VNODE_POSITION_XFS);
+        if (unlikely(bdp == NULL))
+                return NULL;
+        return XFS_BHVTOI(bdp);
+}
+/*
+ * Bring the atime in the XFS inode uptodate.
+ * Used before logging the inode to disk or when the Linux inode goes away.
+ */
+void
+xfs_synchronize_atime(
+        xfs_inode_t     *ip)
+{
+        vnode_t         *vp;
+        vp = XFS_ITOV_NULL(ip);
+        if (vp) {
+                struct inode *inode = &vp->v_inode;
+                ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
+                ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;
+        }
+}
+/*
 * Change the requested timestamp in the given inode.
 * We don't lock across timestamp updates, and we don't log them but
 * we do record the fact that there is dirty information in core.
@@ -77,23 +112,6 @@ xfs_ichgtime(
        struct inode    *inode = LINVFS_GET_IP(XFS_ITOV(ip));
        timespec_t      tv;
-        /*
-         * We're not supposed to change timestamps in readonly-mounted
-         * filesystems.  Throw it away if anyone asks us.
-         */
-        if (unlikely(IS_RDONLY(inode)))
-                return;
-        /*
-         * Don't update access timestamps on reads if mounted "noatime".
-         * Throw it away if anyone asks us.
-         */
-        if (unlikely(
-            (ip->i_mount->m_flags & XFS_MOUNT_NOATIME || IS_NOATIME(inode)) &&
-            (flags & (XFS_ICHGTIME_ACC|XFS_ICHGTIME_MOD|XFS_ICHGTIME_CHG)) ==
-                        XFS_ICHGTIME_ACC))
-                return;
        nanotime(&tv);
        if (flags & XFS_ICHGTIME_MOD) {
                inode->i_mtime = tv;
@@ -130,8 +148,6 @@ xfs_ichgtime(
 * Variant on the above which avoids querying the system clock
 * in situations where we know the Linux inode timestamps have
 * just been updated (and so we can update our inode cheaply).
- * We also skip the readonly and noatime checks here, they are
- * also catered for already.
 */
 void
 xfs_ichgtime_fast(
@@ -142,20 +158,16 @@ xfs_ichgtime_fast(
        timespec_t      *tvp;
        /*
-         * We're not supposed to change timestamps in readonly-mounted
+         * Atime updates for read() & friends are handled lazily now, and
-         * filesystems.  Throw it away if anyone asks us.
+         * explicit updates must go through xfs_ichgtime()
         */
-        if (unlikely(IS_RDONLY(inode)))
+        ASSERT((flags & XFS_ICHGTIME_ACC) == 0);
-                return;
        /*
-         * Don't update access timestamps on reads if mounted "noatime".
+         * We're not supposed to change timestamps in readonly-mounted
-         * Throw it away if anyone asks us.
+         * filesystems.  Throw it away if anyone asks us.
         */
-        if (unlikely(
+        if (unlikely(IS_RDONLY(inode)))
-            (ip->i_mount->m_flags & XFS_MOUNT_NOATIME || IS_NOATIME(inode)) &&
-            ((flags & (XFS_ICHGTIME_ACC|XFS_ICHGTIME_MOD|XFS_ICHGTIME_CHG)) ==
-                        XFS_ICHGTIME_ACC)))
                return;
        if (flags & XFS_ICHGTIME_MOD) {
@@ -163,11 +175,6 @@ xfs_ichgtime_fast(
                ip->i_d.di_mtime.t_sec = (__int32_t)tvp->tv_sec;
                ip->i_d.di_mtime.t_nsec = (__int32_t)tvp->tv_nsec;
        }
-        if (flags & XFS_ICHGTIME_ACC) {
-                tvp = &inode->i_atime;
-                ip->i_d.di_atime.t_sec = (__int32_t)tvp->tv_sec;
-                ip->i_d.di_atime.t_nsec = (__int32_t)tvp->tv_nsec;
-        }
        if (flags & XFS_ICHGTIME_CHG) {
                tvp = &inode->i_ctime;
                ip->i_d.di_ctime.t_sec = (__int32_t)tvp->tv_sec;
@@ -214,6 +221,39 @@ validate_fields(
 }
 /*
+ * Hook in SELinux.  This is not quite correct yet, what we really need
+ * here (as we do for default ACLs) is a mechanism by which creation of
+ * these attrs can be journalled at inode creation time (along with the
+ * inode, of course, such that log replay can't cause these to be lost).
+ */
+STATIC int
+linvfs_init_security(
+        struct vnode    *vp,
+        struct inode    *dir)
+{
+        struct inode    *ip = LINVFS_GET_IP(vp);
+        size_t          length;
+        void            *value;
+        char            *name;
+        int             error;
+        error = security_inode_init_security(ip, dir, &name, &value, &length);
+        if (error) {
+                if (error == -EOPNOTSUPP)
+                        return 0;
+                return -error;
+        }
+        VOP_ATTR_SET(vp, name, value, length, ATTR_SECURE, NULL, error);
+        if (!error)
+                VMODIFY(vp);
+        kfree(name);
+        kfree(value);
+        return error;
+}
+/*
 * Determine whether a process has a valid fs_struct (kernel daemons
 * like knfsd don't have an fs_struct).
 *
@@ -278,6 +318,9 @@ linvfs_mknod(
                break;
        }
+        if (!error)
+                error = linvfs_init_security(vp, dir);
        if (default_acl) {
                if (!error) {
                        error = _ACL_INHERIT(vp, &va, default_acl);
@@ -294,8 +337,6 @@ linvfs_mknod(
                                teardown.d_inode = ip = LINVFS_GET_IP(vp);
                                teardown.d_name = dentry->d_name;
-                                vn_mark_bad(vp);
-                                
                                if (S_ISDIR(mode))
                                        VOP_RMDIR(dvp, &teardown, NULL, err2);
                                else
@@ -506,7 +547,7 @@ linvfs_follow_link(
        ASSERT(dentry);
        ASSERT(nd);
-        link = (char *)kmalloc(MAXNAMELEN+1, GFP_KERNEL);
+        link = (char *)kmalloc(MAXPATHLEN+1, GFP_KERNEL);
        if (!link) {
                nd_set_link(nd, ERR_PTR(-ENOMEM));
                return NULL;
@@ -522,12 +563,12 @@ linvfs_follow_link(
        vp = LINVFS_GET_VP(dentry->d_inode);
        iov.iov_base = link;
-        iov.iov_len = MAXNAMELEN;
+        iov.iov_len = MAXPATHLEN;
        uio->uio_iov = &iov;
        uio->uio_offset = 0;
        uio->uio_segflg = UIO_SYSSPACE;
-        uio->uio_resid = MAXNAMELEN;
+        uio->uio_resid = MAXPATHLEN;
        uio->uio_iovcnt = 1;
        VOP_READLINK(vp, uio, 0, NULL, error);
@@ -535,7 +576,7 @@ linvfs_follow_link(
                kfree(link);
                link = ERR_PTR(-error);
        } else {
-                link[MAXNAMELEN - uio->uio_resid] = '\0';
+                link[MAXPATHLEN - uio->uio_resid] = '\0';
        }
        kfree(uio);
diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h
index ee784b63acbf..6899a6b4a50a 100644
--- a/fs/xfs/linux-2.6/xfs_iops.h
+++ b/fs/xfs/linux-2.6/xfs_iops.h
@@ -26,11 +26,6 @@ extern struct file_operations linvfs_file_operations;
 extern struct file_operations linvfs_invis_file_operations;
 extern struct file_operations linvfs_dir_operations;
-extern struct address_space_operations linvfs_aops;
-extern int linvfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
-extern void linvfs_unwritten_done(struct buffer_head *, int);
 extern int xfs_ioctl(struct bhv_desc *, struct inode *, struct file *,
                        int, unsigned int, void __user *);
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index d8e21ba0cccc..67389b745526 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -110,10 +110,6 @@
 * delalloc and these ondisk-uninitialised buffers.
 */
 BUFFER_FNS(PrivateStart, unwritten);
-static inline void set_buffer_unwritten_io(struct buffer_head *bh)
-{
-        bh->b_end_io = linvfs_unwritten_done;
-}
 #define restricted_chown        xfs_params.restrict_chown.val
 #define irix_sgid_inherit       xfs_params.sgid_inherit.val
@@ -232,7 +228,7 @@ static inline void set_buffer_unwritten_io(struct buffer_head *bh)
 #define xfs_itruncate_data(ip, off)     \
        (-vmtruncate(LINVFS_GET_IP(XFS_ITOV(ip)), (off)))
 #define xfs_statvfs_fsid(statp, mp)     \
-        ({ u64 id = huge_encode_dev((mp)->m_dev);       \
+        ({ u64 id = huge_encode_dev((mp)->m_ddev_targp->bt_dev); \
           __kernel_fsid_t *fsid = &(statp)->f_fsid;    \
        (fsid->val[0] = (u32)id, fsid->val[1] = (u32)(id >> 32)); })
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 885dfafeabee..e0ab45fbfebd 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -233,8 +233,8 @@ xfs_read(
                xfs_buftarg_t   *target =
                        (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
                                mp->m_rtdev_targp : mp->m_ddev_targp;
-                if ((*offset & target->pbr_smask) ||
+                if ((*offset & target->bt_smask) ||
-                    (size & target->pbr_smask)) {
+                    (size & target->bt_smask)) {
                        if (*offset == ip->i_d.di_size) {
                                return (0);
                        }
@@ -281,9 +281,6 @@ xfs_read(
        xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-        if (likely(!(ioflags & IO_INVIS)))
-                xfs_ichgtime_fast(ip, inode, XFS_ICHGTIME_ACC);
 unlock_isem:
        if (unlikely(ioflags & IO_ISDIRECT))
                mutex_unlock(&inode->i_mutex);
@@ -346,9 +343,6 @@ xfs_sendfile(
        if (ret > 0)
                XFS_STATS_ADD(xs_read_bytes, ret);
-        if (likely(!(ioflags & IO_INVIS)))
-                xfs_ichgtime_fast(ip, LINVFS_GET_IP(vp), XFS_ICHGTIME_ACC);
        return ret;
 }
@@ -362,7 +356,6 @@ STATIC int				/* error (positive) */
 xfs_zero_last_block(
        struct inode    *ip,
        xfs_iocore_t    *io,
-        xfs_off_t       offset,
        xfs_fsize_t     isize,
        xfs_fsize_t     end_size)
 {
@@ -371,19 +364,16 @@ xfs_zero_last_block(
        int             nimaps;
        int             zero_offset;
        int             zero_len;
-        int             isize_fsb_offset;
        int             error = 0;
        xfs_bmbt_irec_t imap;
        loff_t          loff;
-        size_t          lsize;
        ASSERT(ismrlocked(io->io_lock, MR_UPDATE) != 0);
-        ASSERT(offset > isize);
        mp = io->io_mount;
-        isize_fsb_offset = XFS_B_FSB_OFFSET(mp, isize);
+        zero_offset = XFS_B_FSB_OFFSET(mp, isize);
-        if (isize_fsb_offset == 0) {
+        if (zero_offset == 0) {
                /*
                 * There are no extra bytes in the last block on disk to
                 * zero, so return.
@@ -413,10 +403,8 @@ xfs_zero_last_block(
         */
        XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD);
        loff = XFS_FSB_TO_B(mp, last_fsb);
-        lsize = XFS_FSB_TO_B(mp, 1);
-        zero_offset = isize_fsb_offset;
+        zero_len = mp->m_sb.sb_blocksize - zero_offset;
-        zero_len = mp->m_sb.sb_blocksize - isize_fsb_offset;
        error = xfs_iozero(ip, loff + zero_offset, zero_len, end_size);
@@ -447,20 +435,17 @@ xfs_zero_eof(
        struct inode    *ip = LINVFS_GET_IP(vp);
        xfs_fileoff_t   start_zero_fsb;
        xfs_fileoff_t   end_zero_fsb;
-        xfs_fileoff_t   prev_zero_fsb;
        xfs_fileoff_t   zero_count_fsb;
        xfs_fileoff_t   last_fsb;
        xfs_extlen_t    buf_len_fsb;
-        xfs_extlen_t    prev_zero_count;
        xfs_mount_t     *mp;
        int             nimaps;
        int             error = 0;
        xfs_bmbt_irec_t imap;
-        loff_t          loff;
-        size_t          lsize;
        ASSERT(ismrlocked(io->io_lock, MR_UPDATE));
        ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
+        ASSERT(offset > isize);
        mp = io->io_mount;
@@ -468,7 +453,7 @@ xfs_zero_eof(
         * First handle zeroing the block on which isize resides.
         * We only zero a part of that block so it is handled specially.
         */
-        error = xfs_zero_last_block(ip, io, offset, isize, end_size);
+        error = xfs_zero_last_block(ip, io, isize, end_size);
        if (error) {
                ASSERT(ismrlocked(io->io_lock, MR_UPDATE));
                ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
@@ -496,8 +481,6 @@ xfs_zero_eof(
        }
        ASSERT(start_zero_fsb <= end_zero_fsb);
-        prev_zero_fsb = NULLFILEOFF;
-        prev_zero_count = 0;
        while (start_zero_fsb <= end_zero_fsb) {
                nimaps = 1;
                zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
@@ -519,10 +502,7 @@ xfs_zero_eof(
                         * that sits on a hole and sets the page as P_HOLE
                         * and calls remapf if it is a mapped file.
                         */
-                        prev_zero_fsb = NULLFILEOFF;
+                        start_zero_fsb = imap.br_startoff + imap.br_blockcount;
-                        prev_zero_count = 0;
-                        start_zero_fsb = imap.br_startoff +
-                                         imap.br_blockcount;
                        ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
                        continue;
                }
@@ -543,17 +523,15 @@ xfs_zero_eof(
                 */
                XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
-                loff = XFS_FSB_TO_B(mp, start_zero_fsb);
+                error = xfs_iozero(ip,
-                lsize = XFS_FSB_TO_B(mp, buf_len_fsb);
+                                   XFS_FSB_TO_B(mp, start_zero_fsb),
+                                   XFS_FSB_TO_B(mp, buf_len_fsb),
-                error = xfs_iozero(ip, loff, lsize, end_size);
+                                   end_size);
                if (error) {
                        goto out_lock;
                }
-                prev_zero_fsb = start_zero_fsb;
-                prev_zero_count = buf_len_fsb;
                start_zero_fsb = imap.br_startoff + buf_len_fsb;
                ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
@@ -640,7 +618,7 @@ xfs_write(
                        (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
                                mp->m_rtdev_targp : mp->m_ddev_targp;
-                if ((pos & target->pbr_smask) || (count & target->pbr_smask))
+                if ((pos & target->bt_smask) || (count & target->bt_smask))
                        return XFS_ERROR(-EINVAL);
                if (!VN_CACHED(vp) && pos < i_size_read(inode))
@@ -831,6 +809,10 @@ retry:
                goto retry;
        }
+        isize = i_size_read(inode);
+        if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize))
+                *offset = isize;
        if (*offset > xip->i_d.di_size) {
                xfs_ilock(xip, XFS_ILOCK_EXCL);
                if (*offset > xip->i_d.di_size) {
@@ -956,7 +938,7 @@ xfs_bdstrat_cb(struct xfs_buf *bp)
        mp = XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *);
        if (!XFS_FORCED_SHUTDOWN(mp)) {
-                pagebuf_iorequest(bp);
+                xfs_buf_iorequest(bp);
                return 0;
        } else {
                xfs_buftrace("XFS__BDSTRAT IOERROR", bp);
@@ -1009,7 +991,7 @@ xfsbdstrat(
                 * if (XFS_BUF_IS_GRIO(bp)) {
                 */
-                pagebuf_iorequest(bp);
+                xfs_buf_iorequest(bp);
                return 0;
        }
diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c
index 6c40a74be7c8..8955720a2c6b 100644
--- a/fs/xfs/linux-2.6/xfs_stats.c
+++ b/fs/xfs/linux-2.6/xfs_stats.c
@@ -34,7 +34,7 @@ xfs_read_xfsstats(
        __uint64_t      xs_write_bytes = 0;
        __uint64_t      xs_read_bytes = 0;
-        static struct xstats_entry {
+        static const struct xstats_entry {
                char    *desc;
                int     endpoint;
        } xstats[] = {
diff --git a/fs/xfs/linux-2.6/xfs_stats.h b/fs/xfs/linux-2.6/xfs_stats.h
index 50027c4a5618..8ba7a2fa6c1d 100644
--- a/fs/xfs/linux-2.6/xfs_stats.h
+++ b/fs/xfs/linux-2.6/xfs_stats.h
@@ -109,15 +109,15 @@ struct xfsstats {
        __uint32_t              vn_remove;      /* # times vn_remove called */
        __uint32_t              vn_free;        /* # times vn_free called */
 #define XFSSTAT_END_BUF                 (XFSSTAT_END_VNODE_OPS+9)
-        __uint32_t              pb_get;
+        __uint32_t              xb_get;
-        __uint32_t              pb_create;
+        __uint32_t              xb_create;
-        __uint32_t              pb_get_locked;
+        __uint32_t              xb_get_locked;
-        __uint32_t              pb_get_locked_waited;
+        __uint32_t              xb_get_locked_waited;
-        __uint32_t              pb_busy_locked;
+        __uint32_t              xb_busy_locked;
-        __uint32_t              pb_miss_locked;
+        __uint32_t              xb_miss_locked;
-        __uint32_t              pb_page_retries;
+        __uint32_t              xb_page_retries;
-        __uint32_t              pb_page_found;
+        __uint32_t              xb_page_found;
-        __uint32_t              pb_get_read;
+        __uint32_t              xb_get_read;
 /* Extra precision counters */
        __uint64_t              xs_xstrat_bytes;
        __uint64_t              xs_write_bytes;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 6116b5bf433e..f22e426d9e42 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -306,13 +306,15 @@ xfs_mountfs_check_barriers(xfs_mount_t *mp)
                xfs_fs_cmn_err(CE_NOTE, mp,
                  "Disabling barriers, not supported with external log device");
                mp->m_flags &= ~XFS_MOUNT_BARRIER;
+                return;
        }
-        if (mp->m_ddev_targp->pbr_bdev->bd_disk->queue->ordered ==
+        if (mp->m_ddev_targp->bt_bdev->bd_disk->queue->ordered ==
                                        QUEUE_ORDERED_NONE) {
                xfs_fs_cmn_err(CE_NOTE, mp,
                  "Disabling barriers, not supported by the underlying device");
                mp->m_flags &= ~XFS_MOUNT_BARRIER;
+                return;
        }
        error = xfs_barrier_test(mp);
@@ -320,6 +322,7 @@ xfs_mountfs_check_barriers(xfs_mount_t *mp)
                xfs_fs_cmn_err(CE_NOTE, mp,
                  "Disabling barriers, trial barrier write failed");
                mp->m_flags &= ~XFS_MOUNT_BARRIER;
+                return;
        }
 }
@@ -327,7 +330,7 @@ void
 xfs_blkdev_issue_flush(
        xfs_buftarg_t           *buftarg)
 {
-        blkdev_issue_flush(buftarg->pbr_bdev, NULL);
+        blkdev_issue_flush(buftarg->bt_bdev, NULL);
 }
 STATIC struct inode *
@@ -576,7 +579,7 @@ xfssyncd(
                timeleft = schedule_timeout_interruptible(timeleft);
                /* swsusp */
                try_to_freeze();
-                if (kthread_should_stop())
+                if (kthread_should_stop() && list_empty(&vfsp->vfs_sync_list))
                        break;
                spin_lock(&vfsp->vfs_sync_lock);
@@ -966,9 +969,9 @@ init_xfs_fs( void )
        if (error < 0)
                goto undo_zones;
-        error = pagebuf_init();
+        error = xfs_buf_init();
        if (error < 0)
-                goto undo_pagebuf;
+                goto undo_buffers;
        vn_init();
        xfs_init();
@@ -982,9 +985,9 @@ init_xfs_fs( void )
        return 0;
 undo_register:
-        pagebuf_terminate();
+        xfs_buf_terminate();
-undo_pagebuf:
+undo_buffers:
        linvfs_destroy_zones();
 undo_zones:
@@ -998,7 +1001,7 @@ exit_xfs_fs( void )
        XFS_DM_EXIT(&xfs_fs_type);
        unregister_filesystem(&xfs_fs_type);
        xfs_cleanup();
-        pagebuf_terminate();
+        xfs_buf_terminate();
        linvfs_destroy_zones();
        ktrace_uninit();
 }
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
index e9bbcb4d6243..260dd8415dd7 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ b/fs/xfs/linux-2.6/xfs_vnode.c
@@ -106,7 +106,6 @@ vn_revalidate_core(
        inode->i_blocks     = vap->va_nblocks;
        inode->i_mtime      = vap->va_mtime;
        inode->i_ctime      = vap->va_ctime;
-        inode->i_atime      = vap->va_atime;
        inode->i_blksize    = vap->va_blocksize;
        if (vap->va_xflags & XFS_XFLAG_IMMUTABLE)
                inode->i_flags |= S_IMMUTABLE;
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index f2bbb327c081..0fe2419461d6 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -566,6 +566,25 @@ static inline int VN_BAD(struct vnode *vp)
 }
 /*
+ * Extracting atime values in various formats
+ */
+static inline void vn_atime_to_bstime(struct vnode *vp, xfs_bstime_t *bs_atime)
+{
+        bs_atime->tv_sec = vp->v_inode.i_atime.tv_sec;
+        bs_atime->tv_nsec = vp->v_inode.i_atime.tv_nsec;
+}
+static inline void vn_atime_to_timespec(struct vnode *vp, struct timespec *ts)
+{
+        *ts = vp->v_inode.i_atime;
+}
+static inline void vn_atime_to_time_t(struct vnode *vp, time_t *tt)
+{
+        *tt = vp->v_inode.i_atime.tv_sec;
+}
+/*
 * Some useful predicates.
 */
 #define VN_MAPPED(vp)   mapping_mapped(LINVFS_GET_IP(vp)->i_mapping)
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index 2f69822344e5..2ec6b441849c 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -239,7 +239,7 @@ xfs_qm_dquot_logitem_pushbuf(
         * trying to duplicate our effort.
         */
        ASSERT(qip->qli_pushbuf_flag != 0);
-        ASSERT(qip->qli_push_owner == get_thread_id());
+        ASSERT(qip->qli_push_owner == current_pid());
        /*
         * If flushlock isn't locked anymore, chances are that the
@@ -333,7 +333,7 @@ xfs_qm_dquot_logitem_trylock(
                        qip->qli_pushbuf_flag = 1;
                        ASSERT(qip->qli_format.qlf_blkno == dqp->q_blkno);
 #ifdef DEBUG
-                        qip->qli_push_owner = get_thread_id();
+                        qip->qli_push_owner = current_pid();
 #endif
                        /*
                         * The dquot is left locked.
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index bb6991a7a617..7dcdd0640c32 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -1392,11 +1392,12 @@ xfs_qm_qino_alloc(
 {
        xfs_trans_t     *tp;
        int             error;
-        unsigned long s;
+        unsigned long   s;
        cred_t          zerocr;
+        xfs_inode_t     zeroino;
        int             committed;
-        tp = xfs_trans_alloc(mp,XFS_TRANS_QM_QINOCREATE);
+        tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE);
        if ((error = xfs_trans_reserve(tp,
                                      XFS_QM_QINOCREATE_SPACE_RES(mp),
                                      XFS_CREATE_LOG_RES(mp), 0,
@@ -1406,8 +1407,9 @@ xfs_qm_qino_alloc(
                return (error);
        }
        memset(&zerocr, 0, sizeof(zerocr));
+        memset(&zeroino, 0, sizeof(zeroino));
-        if ((error = xfs_dir_ialloc(&tp, mp->m_rootip, S_IFREG, 1, 0,
+        if ((error = xfs_dir_ialloc(&tp, &zeroino, S_IFREG, 1, 0,
                                   &zerocr, 0, 1, ip, &committed))) {
                xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
                                 XFS_TRANS_ABORT);
@@ -1918,9 +1920,7 @@ xfs_qm_quotacheck(
         * at this point (because we intentionally didn't in dqget_noattach).
         */
        if (error) {
-                xfs_qm_dqpurge_all(mp,
+                xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_QUOTAOFF);
-                                   XFS_QMOPT_UQUOTA|XFS_QMOPT_GQUOTA|
-                                   XFS_QMOPT_PQUOTA|XFS_QMOPT_QUOTAOFF);
                goto error_return;
        }
        /*
@@ -2743,6 +2743,7 @@ xfs_qm_vop_dqattach_and_dqmod_newinode(
                xfs_dqunlock(udqp);
                ASSERT(ip->i_udquot == NULL);
                ip->i_udquot = udqp;
+                ASSERT(XFS_IS_UQUOTA_ON(tp->t_mountp));
                ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
                xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
        }
@@ -2752,7 +2753,10 @@ xfs_qm_vop_dqattach_and_dqmod_newinode(
                xfs_dqunlock(gdqp);
                ASSERT(ip->i_gdquot == NULL);
                ip->i_gdquot = gdqp;
-                ASSERT(ip->i_d.di_gid == be32_to_cpu(gdqp->q_core.d_id));
+                ASSERT(XFS_IS_OQUOTA_ON(tp->t_mountp));
+                ASSERT((XFS_IS_GQUOTA_ON(tp->t_mountp) ?
+                        ip->i_d.di_gid : ip->i_d.di_projid) ==
+                                be32_to_cpu(gdqp->q_core.d_id));
                xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
        }
 }
diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c
index bb6dc91ea261..b08b3d9345b7 100644
--- a/fs/xfs/support/debug.c
+++ b/fs/xfs/support/debug.c
@@ -27,45 +27,12 @@ static DEFINE_SPINLOCK(xfs_err_lock);
 /* Translate from CE_FOO to KERN_FOO, err_level(CE_FOO) == KERN_FOO */
 #define XFS_MAX_ERR_LEVEL       7
 #define XFS_ERR_MASK            ((1 << 3) - 1)
-static char             *err_level[XFS_MAX_ERR_LEVEL+1] =
+static const char * const       err_level[XFS_MAX_ERR_LEVEL+1] =
                                        {KERN_EMERG, KERN_ALERT, KERN_CRIT,
                                         KERN_ERR, KERN_WARNING, KERN_NOTICE,
                                         KERN_INFO, KERN_DEBUG};
 void
-assfail(char *a, char *f, int l)
-{
-    printk("XFS assertion failed: %s, file: %s, line: %d\n", a, f, l);
-    BUG();
-}
-#if ((defined(DEBUG) || defined(INDUCE_IO_ERRROR)) && !defined(NO_WANT_RANDOM))
-unsigned long
-random(void)
-{
-        static unsigned long    RandomValue = 1;
-        /* cycles pseudo-randomly through all values between 1 and 2^31 - 2 */
-        register long   rv = RandomValue;
-        register long   lo;
-        register long   hi;
-        hi = rv / 127773;
-        lo = rv % 127773;
-        rv = 16807 * lo - 2836 * hi;
-        if( rv <= 0 ) rv += 2147483647;
-        return( RandomValue = rv );
-}
-int
-get_thread_id(void)
-{
-        return current->pid;
-}
-#endif /* DEBUG || INDUCE_IO_ERRROR || !NO_WANT_RANDOM */
-void
 cmn_err(register int level, char *fmt, ...)
 {
        char    *fp = fmt;
@@ -90,7 +57,6 @@ cmn_err(register int level, char *fmt, ...)
                BUG();
 }
 void
 icmn_err(register int level, char *fmt, va_list ap)
 {
@@ -109,3 +75,27 @@ icmn_err(register int level, char *fmt, va_list ap)
        if (level == CE_PANIC)
                BUG();
 }
+void
+assfail(char *expr, char *file, int line)
+{
+        printk("Assertion failed: %s, file: %s, line: %d\n", expr, file, line);
+        BUG();
+}
+#if ((defined(DEBUG) || defined(INDUCE_IO_ERRROR)) && !defined(NO_WANT_RANDOM))
+unsigned long random(void)
+{
+        static unsigned long    RandomValue = 1;
+        /* cycles pseudo-randomly through all values between 1 and 2^31 - 2 */
+        register long   rv = RandomValue;
+        register long   lo;
+        register long   hi;
+        hi = rv / 127773;
+        lo = rv % 127773;
+        rv = 16807 * lo - 2836 * hi;
+        if (rv <= 0) rv += 2147483647;
+        return RandomValue = rv;
+}
+#endif /* DEBUG || INDUCE_IO_ERRROR || !NO_WANT_RANDOM */
diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h
index aff558664c32..e3bf58112e7e 100644
--- a/fs/xfs/support/debug.h
+++ b/fs/xfs/support/debug.h
@@ -31,24 +31,23 @@ extern void icmn_err(int, char *, va_list)
        __attribute__ ((format (printf, 2, 0)));
 extern void cmn_err(int, char *, ...)
        __attribute__ ((format (printf, 2, 3)));
+extern void assfail(char *expr, char *f, int l);
-#ifndef STATIC
+#define prdev(fmt,targ,args...) \
-# define STATIC static
+        printk("Device %s - " fmt "\n", XFS_BUFTARG_NAME(targ), ## args)
-#endif
-#ifdef DEBUG
+#define ASSERT_ALWAYS(expr)     \
-# define ASSERT(EX)     ((EX) ? ((void)0) : assfail(#EX, __FILE__, __LINE__))
+        (unlikely((expr) != 0) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
-#else
-# define ASSERT(x)      ((void)0)
-#endif
-extern void assfail(char *, char *, int);
+#ifndef DEBUG
-#ifdef DEBUG
+# define ASSERT(expr)   ((void)0)
+#else
+# define ASSERT(expr)   ASSERT_ALWAYS(expr)
 extern unsigned long random(void);
-extern int get_thread_id(void);
 #endif
-#define ASSERT_ALWAYS(EX)  ((EX)?((void)0):assfail(#EX, __FILE__, __LINE__))
+#ifndef STATIC
-#define debug_stop_all_cpus(param)      /* param is "cpumask_t *" */
+# define STATIC static
+#endif
 #endif  /* __XFS_SUPPORT_DEBUG_H__ */
diff --git a/fs/xfs/support/uuid.c b/fs/xfs/support/uuid.c
index 69ec4f540c3a..a3d565a67734 100644
--- a/fs/xfs/support/uuid.c
+++ b/fs/xfs/support/uuid.c
@@ -27,6 +27,16 @@ uuid_init(void)
        mutex_init(&uuid_monitor);
 }
+/* IRIX interpretation of an uuid_t */
+typedef struct {
+        __be32  uu_timelow;
+        __be16  uu_timemid;
+        __be16  uu_timehi;
+        __be16  uu_clockseq;
+        __be16  uu_node[3];
+} xfs_uu_t;
 /*
 * uuid_getnodeuniq - obtain the node unique fields of a UUID.
 *
@@ -36,16 +46,11 @@ uuid_init(void)
 void
 uuid_getnodeuniq(uuid_t *uuid, int fsid [2])
 {
-        char    *uu = (char *)uuid;
+        xfs_uu_t *uup = (xfs_uu_t *)uuid;
-        /* on IRIX, this function assumes big-endian fields within
-         * the uuid, so we use INT_GET to get the same result on
-         * little-endian systems
-         */
-        fsid[0] = (INT_GET(*(u_int16_t*)(uu+8), ARCH_CONVERT) << 16) +
+        fsid[0] = (be16_to_cpu(uup->uu_clockseq) << 16) |
-                   INT_GET(*(u_int16_t*)(uu+4), ARCH_CONVERT);
+                   be16_to_cpu(uup->uu_timemid);
-        fsid[1] =  INT_GET(*(u_int32_t*)(uu  ), ARCH_CONVERT);
+        fsid[1] = be16_to_cpu(uup->uu_timelow);
 }
 void
diff --git a/fs/xfs/xfs_arch.h b/fs/xfs/xfs_arch.h
index 68e5051d8e24..c4836890b726 100644
--- a/fs/xfs/xfs_arch.h
+++ b/fs/xfs/xfs_arch.h
@@ -40,6 +40,22 @@
 #undef XFS_NATIVE_HOST
 #endif
+#ifdef XFS_NATIVE_HOST
+#define cpu_to_be16(val)        ((__be16)(val))
+#define cpu_to_be32(val)        ((__be32)(val))
+#define cpu_to_be64(val)        ((__be64)(val))
+#define be16_to_cpu(val)        ((__uint16_t)(val))
+#define be32_to_cpu(val)        ((__uint32_t)(val))
+#define be64_to_cpu(val)        ((__uint64_t)(val))
+#else
+#define cpu_to_be16(val)        (__swab16((__uint16_t)(val)))
+#define cpu_to_be32(val)        (__swab32((__uint32_t)(val)))
+#define cpu_to_be64(val)        (__swab64((__uint64_t)(val)))
+#define be16_to_cpu(val)        (__swab16((__be16)(val)))
+#define be32_to_cpu(val)        (__swab32((__be32)(val)))
+#define be64_to_cpu(val)        (__swab64((__be64)(val)))
+#endif
 #endif  /* __KERNEL__ */
 /* do we need conversion? */
@@ -186,7 +202,7 @@ static inline void be64_add(__be64 *a, __s64 b)
 */ 
 #define XFS_GET_DIR_INO4(di) \
-        (((u32)(di).i[0] << 24) | ((di).i[1] << 16) | ((di).i[2] << 8) | ((di).i[3]))
+        (((__u32)(di).i[0] << 24) | ((di).i[1] << 16) | ((di).i[2] << 8) | ((di).i[3]))
 #define XFS_PUT_DIR_INO4(from, di) \
 do { \
@@ -197,9 +213,9 @@ do { \
 } while (0)
 #define XFS_DI_HI(di) \
-        (((u32)(di).i[1] << 16) | ((di).i[2] << 8) | ((di).i[3]))
+        (((__u32)(di).i[1] << 16) | ((di).i[2] << 8) | ((di).i[3]))
 #define XFS_DI_LO(di) \
-        (((u32)(di).i[4] << 24) | ((di).i[5] << 16) | ((di).i[6] << 8) | ((di).i[7]))
+        (((__u32)(di).i[4] << 24) | ((di).i[5] << 16) | ((di).i[6] << 8) | ((di).i[7]))
 #define XFS_GET_DIR_INO8(di)        \
        (((xfs_ino_t)XFS_DI_LO(di) & 0xffffffffULL) | \
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 1c7421840c18..fe91eac4e2a7 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -128,7 +128,7 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
                return (offset >= minforkoff) ? minforkoff : 0;
        }
-        if (unlikely(mp->m_flags & XFS_MOUNT_COMPAT_ATTR)) {
+        if (!(mp->m_flags & XFS_MOUNT_ATTR2)) {
                if (bytes <= XFS_IFORK_ASIZE(dp))
                        return mp->m_attroffset >> 3;
                return 0;
@@ -157,7 +157,7 @@ xfs_sbversion_add_attr2(xfs_mount_t *mp, xfs_trans_t *tp)
 {
        unsigned long s;
-        if (!(mp->m_flags & XFS_MOUNT_COMPAT_ATTR) &&
+        if ((mp->m_flags & XFS_MOUNT_ATTR2) &&
            !(XFS_SB_VERSION_HASATTR2(&mp->m_sb))) {
                s = XFS_SB_LOCK(mp);
                if (!XFS_SB_VERSION_HASATTR2(&mp->m_sb)) {
@@ -311,7 +311,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
         */
        totsize -= size;
        if (totsize == sizeof(xfs_attr_sf_hdr_t) && !args->addname &&
-            !(mp->m_flags & XFS_MOUNT_COMPAT_ATTR)) {
+            (mp->m_flags & XFS_MOUNT_ATTR2)) {
                /*
                 * Last attribute now removed, revert to original
                 * inode format making all literal area available
@@ -330,7 +330,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
                dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize);
                ASSERT(dp->i_d.di_forkoff);
                ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) || args->addname ||
-                        (mp->m_flags & XFS_MOUNT_COMPAT_ATTR));
+                        !(mp->m_flags & XFS_MOUNT_ATTR2));
                dp->i_afp->if_ext_max =
                        XFS_IFORK_ASIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
                dp->i_df.if_ext_max =
@@ -739,7 +739,7 @@ xfs_attr_shortform_allfit(xfs_dabuf_t *bp, xfs_inode_t *dp)
                                + name_loc->namelen
                                + INT_GET(name_loc->valuelen, ARCH_CONVERT);
        }
-        if (!(dp->i_mount->m_flags & XFS_MOUNT_COMPAT_ATTR) &&
+        if ((dp->i_mount->m_flags & XFS_MOUNT_ATTR2) &&
            (bytes == sizeof(struct xfs_attr_sf_hdr)))
                return(-1);
        return(xfs_attr_shortform_bytesfit(dp, bytes));
@@ -778,7 +778,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
                goto out;
        if (forkoff == -1) {
-                ASSERT(!(dp->i_mount->m_flags & XFS_MOUNT_COMPAT_ATTR));
+                ASSERT(dp->i_mount->m_flags & XFS_MOUNT_ATTR2);
                /*
                 * Last attribute was removed, revert to original
diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h
index f6143ff251a0..541e34109bb9 100644
--- a/fs/xfs/xfs_attr_leaf.h
+++ b/fs/xfs/xfs_attr_leaf.h
@@ -63,7 +63,7 @@ struct xfs_trans;
 * the leaf_entry.  The namespaces are independent only because we also look
 * at the namespace bit when we are looking for a matching attribute name.
 *
- * We also store a "incomplete" bit in the leaf_entry.  It shows that an
+ * We also store an "incomplete" bit in the leaf_entry.  It shows that an
 * attribute is in the middle of being created and should not be shown to
 * the user if we crash during the time that the bit is set.  We clear the
 * bit when we have finished setting up the attribute.  We do this because
@@ -72,42 +72,48 @@ struct xfs_trans;
 */
 #define XFS_ATTR_LEAF_MAPSIZE   3       /* how many freespace slots */
+typedef struct xfs_attr_leaf_map {      /* RLE map of free bytes */
+        __uint16_t      base;           /* base of free region */
+        __uint16_t      size;           /* length of free region */
+} xfs_attr_leaf_map_t;
+typedef struct xfs_attr_leaf_hdr {      /* constant-structure header block */
+        xfs_da_blkinfo_t info;          /* block type, links, etc. */
+        __uint16_t      count;          /* count of active leaf_entry's */
+        __uint16_t      usedbytes;      /* num bytes of names/values stored */
+        __uint16_t      firstused;      /* first used byte in name area */
+        __uint8_t       holes;          /* != 0 if blk needs compaction */
+        __uint8_t       pad1;
+        xfs_attr_leaf_map_t freemap[XFS_ATTR_LEAF_MAPSIZE];
+                                        /* N largest free regions */
+} xfs_attr_leaf_hdr_t;
+typedef struct xfs_attr_leaf_entry {    /* sorted on key, not name */
+        xfs_dahash_t    hashval;        /* hash value of name */
+        __uint16_t      nameidx;        /* index into buffer of name/value */
+        __uint8_t       flags;          /* LOCAL/ROOT/SECURE/INCOMPLETE flag */
+        __uint8_t       pad2;           /* unused pad byte */
+} xfs_attr_leaf_entry_t;
+typedef struct xfs_attr_leaf_name_local {
+        __uint16_t      valuelen;       /* number of bytes in value */
+        __uint8_t       namelen;        /* length of name bytes */
+        __uint8_t       nameval[1];     /* name/value bytes */
+} xfs_attr_leaf_name_local_t;
+typedef struct xfs_attr_leaf_name_remote {
+        xfs_dablk_t     valueblk;       /* block number of value bytes */
+        __uint32_t      valuelen;       /* number of bytes in value */
+        __uint8_t       namelen;        /* length of name bytes */
+        __uint8_t       name[1];        /* name bytes */
+} xfs_attr_leaf_name_remote_t;
 typedef struct xfs_attr_leafblock {
-        struct xfs_attr_leaf_hdr {      /* constant-structure header block */
+        xfs_attr_leaf_hdr_t     hdr;    /* constant-structure header block */
-                xfs_da_blkinfo_t info;  /* block type, links, etc. */
+        xfs_attr_leaf_entry_t   entries[1];     /* sorted on key, not name */
-                __uint16_t count;       /* count of active leaf_entry's */
+        xfs_attr_leaf_name_local_t namelist;    /* grows from bottom of buf */
-                __uint16_t usedbytes;   /* num bytes of names/values stored */
+        xfs_attr_leaf_name_remote_t valuelist;  /* grows from bottom of buf */
-                __uint16_t firstused;   /* first used byte in name area */
-                __uint8_t  holes;       /* != 0 if blk needs compaction */
-                __uint8_t  pad1;
-                struct xfs_attr_leaf_map {        /* RLE map of free bytes */
-                        __uint16_t base;          /* base of free region */
-                        __uint16_t size;          /* length of free region */
-                } freemap[XFS_ATTR_LEAF_MAPSIZE]; /* N largest free regions */
-        } hdr;
-        struct xfs_attr_leaf_entry {    /* sorted on key, not name */
-                xfs_dahash_t hashval;   /* hash value of name */
-                __uint16_t nameidx;     /* index into buffer of name/value */
-                __uint8_t flags;        /* LOCAL/ROOT/SECURE/INCOMPLETE flag */
-                __uint8_t pad2;         /* unused pad byte */
-        } entries[1];                   /* variable sized array */
-        struct xfs_attr_leaf_name_local {
-                __uint16_t valuelen;    /* number of bytes in value */
-                __uint8_t namelen;      /* length of name bytes */
-                __uint8_t nameval[1];   /* name/value bytes */
-        } namelist;                     /* grows from bottom of buf */
-        struct xfs_attr_leaf_name_remote {
-                xfs_dablk_t valueblk;   /* block number of value bytes */
-                __uint32_t valuelen;    /* number of bytes in value */
-                __uint8_t namelen;      /* length of name bytes */
-                __uint8_t name[1];      /* name bytes */
-        } valuelist;                    /* grows from bottom of buf */
 } xfs_attr_leafblock_t;
-typedef struct xfs_attr_leaf_hdr xfs_attr_leaf_hdr_t;
-typedef struct xfs_attr_leaf_map xfs_attr_leaf_map_t;
-typedef struct xfs_attr_leaf_entry xfs_attr_leaf_entry_t;
-typedef struct xfs_attr_leaf_name_local xfs_attr_leaf_name_local_t;
-typedef struct xfs_attr_leaf_name_remote xfs_attr_leaf_name_remote_t;
 /*
 * Flags used in the leaf_entry[i].flags field.
@@ -150,7 +156,8 @@ xfs_attr_leaf_name_local(xfs_attr_leafblock_t *leafp, int idx)
                (leafp))[INT_GET((leafp)->entries[idx].nameidx, ARCH_CONVERT)];
 }
-#define XFS_ATTR_LEAF_NAME(leafp,idx)           xfs_attr_leaf_name(leafp,idx)
+#define XFS_ATTR_LEAF_NAME(leafp,idx)           \
+        xfs_attr_leaf_name(leafp,idx)
 static inline char *xfs_attr_leaf_name(xfs_attr_leafblock_t *leafp, int idx)
 {
        return (&((char *)
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index e415a4698e9c..70625e577c70 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -2146,13 +2146,176 @@ xfs_bmap_add_extent_hole_real(
        return 0; /* keep gcc quite */
 }
+/*
+ * Adjust the size of the new extent based on di_extsize and rt extsize.
+ */
+STATIC int
+xfs_bmap_extsize_align(
+        xfs_mount_t     *mp,
+        xfs_bmbt_irec_t *gotp,          /* next extent pointer */
+        xfs_bmbt_irec_t *prevp,         /* previous extent pointer */
+        xfs_extlen_t    extsz,          /* align to this extent size */
+        int             rt,             /* is this a realtime inode? */
+        int             eof,            /* is extent at end-of-file? */
+        int             delay,          /* creating delalloc extent? */
+        int             convert,        /* overwriting unwritten extent? */
+        xfs_fileoff_t   *offp,          /* in/out: aligned offset */
+        xfs_extlen_t    *lenp)          /* in/out: aligned length */
+{
+        xfs_fileoff_t   orig_off;       /* original offset */
+        xfs_extlen_t    orig_alen;      /* original length */
+        xfs_fileoff_t   orig_end;       /* original off+len */
+        xfs_fileoff_t   nexto;          /* next file offset */
+        xfs_fileoff_t   prevo;          /* previous file offset */
+        xfs_fileoff_t   align_off;      /* temp for offset */
+        xfs_extlen_t    align_alen;     /* temp for length */
+        xfs_extlen_t    temp;           /* temp for calculations */
+        if (convert)
+                return 0;
+        orig_off = align_off = *offp;
+        orig_alen = align_alen = *lenp;
+        orig_end = orig_off + orig_alen;
+        /*
+         * If this request overlaps an existing extent, then don't
+         * attempt to perform any additional alignment.
+         */
+        if (!delay && !eof &&
+            (orig_off >= gotp->br_startoff) &&
+            (orig_end <= gotp->br_startoff + gotp->br_blockcount)) {
+                return 0;
+        }
+        /*
+         * If the file offset is unaligned vs. the extent size
+         * we need to align it.  This will be possible unless
+         * the file was previously written with a kernel that didn't
+         * perform this alignment, or if a truncate shot us in the
+         * foot.
+         */
+        temp = do_mod(orig_off, extsz);
+        if (temp) {
+                align_alen += temp;
+                align_off -= temp;
+        }
+        /*
+         * Same adjustment for the end of the requested area.
+         */
+        if ((temp = (align_alen % extsz))) {
+                align_alen += extsz - temp;
+        }
+        /*
+         * If the previous block overlaps with this proposed allocation
+         * then move the start forward without adjusting the length.
+         */
+        if (prevp->br_startoff != NULLFILEOFF) {
+                if (prevp->br_startblock == HOLESTARTBLOCK)
+                        prevo = prevp->br_startoff;
+                else
+                        prevo = prevp->br_startoff + prevp->br_blockcount;
+        } else
+                prevo = 0;
+        if (align_off != orig_off && align_off < prevo)
+                align_off = prevo;
+        /*
+         * If the next block overlaps with this proposed allocation
+         * then move the start back without adjusting the length,
+         * but not before offset 0.
+         * This may of course make the start overlap previous block,
+         * and if we hit the offset 0 limit then the next block
+         * can still overlap too.
+         */
+        if (!eof && gotp->br_startoff != NULLFILEOFF) {
+                if ((delay && gotp->br_startblock == HOLESTARTBLOCK) ||
+                    (!delay && gotp->br_startblock == DELAYSTARTBLOCK))
+                        nexto = gotp->br_startoff + gotp->br_blockcount;
+                else
+                        nexto = gotp->br_startoff;
+        } else
+                nexto = NULLFILEOFF;
+        if (!eof &&
+            align_off + align_alen != orig_end &&
+            align_off + align_alen > nexto)
+                align_off = nexto > align_alen ? nexto - align_alen : 0;
+        /*
+         * If we're now overlapping the next or previous extent that
+         * means we can't fit an extsz piece in this hole.  Just move
+         * the start forward to the first valid spot and set
+         * the length so we hit the end.
+         */
+        if (align_off != orig_off && align_off < prevo)
+                align_off = prevo;
+        if (align_off + align_alen != orig_end &&
+            align_off + align_alen > nexto &&
+            nexto != NULLFILEOFF) {
+                ASSERT(nexto > prevo);
+                align_alen = nexto - align_off;
+        }
+        /*
+         * If realtime, and the result isn't a multiple of the realtime
+         * extent size we need to remove blocks until it is.
+         */
+        if (rt && (temp = (align_alen % mp->m_sb.sb_rextsize))) {
+                /*
+                 * We're not covering the original request, or
+                 * we won't be able to once we fix the length.
+                 */
+                if (orig_off < align_off ||
+                    orig_end > align_off + align_alen ||
+                    align_alen - temp < orig_alen)
+                        return XFS_ERROR(EINVAL);
+                /*
+                 * Try to fix it by moving the start up.
+                 */
+                if (align_off + temp <= orig_off) {
+                        align_alen -= temp;
+                        align_off += temp;
+                }
+                /*
+                 * Try to fix it by moving the end in.
+                 */
+                else if (align_off + align_alen - temp >= orig_end)
+                        align_alen -= temp;
+                /*
+                 * Set the start to the minimum then trim the length.
+                 */
+                else {
+                        align_alen -= orig_off - align_off;
+                        align_off = orig_off;
+                        align_alen -= align_alen % mp->m_sb.sb_rextsize;
+                }
+                /*
+                 * Result doesn't cover the request, fail it.
+                 */
+                if (orig_off < align_off || orig_end > align_off + align_alen)
+                        return XFS_ERROR(EINVAL);
+        } else {
+                ASSERT(orig_off >= align_off);
+                ASSERT(orig_end <= align_off + align_alen);
+        }
+#ifdef DEBUG
+        if (!eof && gotp->br_startoff != NULLFILEOFF)
+                ASSERT(align_off + align_alen <= gotp->br_startoff);
+        if (prevp->br_startoff != NULLFILEOFF)
+                ASSERT(align_off >= prevp->br_startoff + prevp->br_blockcount);
+#endif
+        *lenp = align_alen;
+        *offp = align_off;
+        return 0;
+}
 #define XFS_ALLOC_GAP_UNITS     4
 /*
 * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
 * It figures out where to ask the underlying allocator to put the new extent.
 */
-STATIC int                              /* error */
+STATIC int
 xfs_bmap_alloc(
        xfs_bmalloca_t  *ap)            /* bmap alloc argument struct */
 {
@@ -2163,10 +2326,10 @@ xfs_bmap_alloc(
        xfs_mount_t     *mp;            /* mount point structure */
        int             nullfb;         /* true if ap->firstblock isn't set */
        int             rt;             /* true if inode is realtime */
-#ifdef __KERNEL__
+        xfs_extlen_t    prod = 0;       /* product factor for allocators */
-        xfs_extlen_t    prod=0;         /* product factor for allocators */
+        xfs_extlen_t    ralen = 0;      /* realtime allocation length */
-        xfs_extlen_t    ralen=0;        /* realtime allocation length */
+        xfs_extlen_t    align;          /* minimum allocation alignment */
-#endif
+        xfs_rtblock_t   rtx;
 #define ISVALID(x,y)    \
        (rt ? \
@@ -2182,125 +2345,25 @@ xfs_bmap_alloc(
        nullfb = ap->firstblock == NULLFSBLOCK;
        rt = XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata;
        fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock);
-#ifdef __KERNEL__
        if (rt) {
-                xfs_extlen_t    extsz;          /* file extent size for rt */
+                align = ap->ip->i_d.di_extsize ?
-                xfs_fileoff_t   nexto;          /* next file offset */
+                        ap->ip->i_d.di_extsize : mp->m_sb.sb_rextsize;
-                xfs_extlen_t    orig_alen;      /* original ap->alen */
+                /* Set prod to match the extent size */
-                xfs_fileoff_t   orig_end;       /* original off+len */
+                prod = align / mp->m_sb.sb_rextsize;
-                xfs_fileoff_t   orig_off;       /* original ap->off */
-                xfs_extlen_t    mod_off;        /* modulus calculations */
+                error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,
-                xfs_fileoff_t   prevo;          /* previous file offset */
+                                                align, rt, ap->eof, 0,
-                xfs_rtblock_t   rtx;            /* realtime extent number */
+                                                ap->conv, &ap->off, &ap->alen);
-                xfs_extlen_t    temp;           /* temp for rt calculations */
+                if (error)
+                        return error;
-                /*
+                ASSERT(ap->alen);
-                 * Set prod to match the realtime extent size.
-                 */
-                if (!(extsz = ap->ip->i_d.di_extsize))
-                        extsz = mp->m_sb.sb_rextsize;
-                prod = extsz / mp->m_sb.sb_rextsize;
-                orig_off = ap->off;
-                orig_alen = ap->alen;
-                orig_end = orig_off + orig_alen;
-                /*
-                 * If the file offset is unaligned vs. the extent size
-                 * we need to align it.  This will be possible unless
-                 * the file was previously written with a kernel that didn't
-                 * perform this alignment.
-                 */
-                mod_off = do_mod(orig_off, extsz);
-                if (mod_off) {
-                        ap->alen += mod_off;
-                        ap->off -= mod_off;
-                }
-                /*
-                 * Same adjustment for the end of the requested area.
-                 */
-                if ((temp = (ap->alen % extsz)))
-                        ap->alen += extsz - temp;
-                /*
-                 * If the previous block overlaps with this proposed allocation
-                 * then move the start forward without adjusting the length.
-                 */
-                prevo =
-                        ap->prevp->br_startoff == NULLFILEOFF ?
-                                0 :
-                                (ap->prevp->br_startoff +
-                                 ap->prevp->br_blockcount);
-                if (ap->off != orig_off && ap->off < prevo)
-                        ap->off = prevo;
-                /*
-                 * If the next block overlaps with this proposed allocation
-                 * then move the start back without adjusting the length,
-                 * but not before offset 0.
-                 * This may of course make the start overlap previous block,
-                 * and if we hit the offset 0 limit then the next block
-                 * can still overlap too.
-                 */
-                nexto = (ap->eof || ap->gotp->br_startoff == NULLFILEOFF) ?
-                        NULLFILEOFF : ap->gotp->br_startoff;
-                if (!ap->eof &&
-                    ap->off + ap->alen != orig_end &&
-                    ap->off + ap->alen > nexto)
-                        ap->off = nexto > ap->alen ? nexto - ap->alen : 0;
-                /*
-                 * If we're now overlapping the next or previous extent that
-                 * means we can't fit an extsz piece in this hole.  Just move
-                 * the start forward to the first valid spot and set
-                 * the length so we hit the end.
-                 */
-                if ((ap->off != orig_off && ap->off < prevo) ||
-                    (ap->off + ap->alen != orig_end &&
-                     ap->off + ap->alen > nexto)) {
-                        ap->off = prevo;
-                        ap->alen = nexto - prevo;
-                }
-                /*
-                 * If the result isn't a multiple of rtextents we need to
-                 * remove blocks until it is.
-                 */
-                if ((temp = (ap->alen % mp->m_sb.sb_rextsize))) {
-                        /*
-                         * We're not covering the original request, or
-                         * we won't be able to once we fix the length.
-                         */
-                        if (orig_off < ap->off ||
-                            orig_end > ap->off + ap->alen ||
-                            ap->alen - temp < orig_alen)
-                                return XFS_ERROR(EINVAL);
-                        /*
-                         * Try to fix it by moving the start up.
-                         */
-                        if (ap->off + temp <= orig_off) {
-                                ap->alen -= temp;
-                                ap->off += temp;
-                        }
-                        /*
-                         * Try to fix it by moving the end in.
-                         */
-                        else if (ap->off + ap->alen - temp >= orig_end)
-                                ap->alen -= temp;
-                        /*
-                         * Set the start to the minimum then trim the length.
-                         */
-                        else {
-                                ap->alen -= orig_off - ap->off;
-                                ap->off = orig_off;
-                                ap->alen -= ap->alen % mp->m_sb.sb_rextsize;
-                        }
-                        /*
-                         * Result doesn't cover the request, fail it.
-                         */
-                        if (orig_off < ap->off || orig_end > ap->off + ap->alen)
-                                return XFS_ERROR(EINVAL);
-                }
                ASSERT(ap->alen % mp->m_sb.sb_rextsize == 0);
                /*
                 * If the offset & length are not perfectly aligned
                 * then kill prod, it will just get us in trouble.
                 */
-                if (do_mod(ap->off, extsz) || ap->alen % extsz)
+                if (do_mod(ap->off, align) || ap->alen % align)
                        prod = 1;
                /*
                 * Set ralen to be the actual requested length in rtextents.
@@ -2326,15 +2389,24 @@ xfs_bmap_alloc(
                        ap->rval = rtx * mp->m_sb.sb_rextsize;
                } else
                        ap->rval = 0;
+        } else {
+                align = (ap->userdata && ap->ip->i_d.di_extsize &&
+                        (ap->ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)) ?
+                        ap->ip->i_d.di_extsize : 0;
+                if (unlikely(align)) {
+                        error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,
+                                                        align, rt,
+                                                        ap->eof, 0, ap->conv,
+                                                        &ap->off, &ap->alen);
+                        ASSERT(!error);
+                        ASSERT(ap->alen);
+                }
+                if (nullfb)
+                        ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
+                else
+                        ap->rval = ap->firstblock;
        }
-#else
-        if (rt)
-                ap->rval = 0;
-#endif  /* __KERNEL__ */
-        else if (nullfb)
-                ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
-        else
-                ap->rval = ap->firstblock;
        /*
         * If allocating at eof, and there's a previous real block,
         * try to use it's last block as our starting point.
@@ -2598,11 +2670,12 @@ xfs_bmap_alloc(
                        args.total = ap->total;
                        args.minlen = ap->minlen;
                }
-                if (ap->ip->i_d.di_extsize) {
+                if (unlikely(ap->userdata && ap->ip->i_d.di_extsize &&
+                            (ap->ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE))) {
                        args.prod = ap->ip->i_d.di_extsize;
                        if ((args.mod = (xfs_extlen_t)do_mod(ap->off, args.prod)))
                                args.mod = (xfs_extlen_t)(args.prod - args.mod);
-                } else if (mp->m_sb.sb_blocksize >= NBPP) {
+                } else if (unlikely(mp->m_sb.sb_blocksize >= NBPP)) {
                        args.prod = 1;
                        args.mod = 0;
                } else {
@@ -3580,14 +3653,16 @@ xfs_bmap_search_extents(
        ep = xfs_bmap_do_search_extents(base, lastx, nextents, bno, eofp,
                                          lastxp, gotp, prevp);
-        rt = ip->i_d.di_flags & XFS_DIFLAG_REALTIME;
+        rt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
-        if(!rt && !gotp->br_startblock && (*lastxp != NULLEXTNUM)) {
+        if (unlikely(!rt && !gotp->br_startblock && (*lastxp != NULLEXTNUM))) {
                cmn_err(CE_PANIC,"Access to block zero: fs: <%s> inode: %lld "
                        "start_block : %llx start_off : %llx blkcnt : %llx "
                        "extent-state : %x \n",
-                        (ip->i_mount)->m_fsname,(long long)ip->i_ino,
+                        (ip->i_mount)->m_fsname, (long long)ip->i_ino,
-                        gotp->br_startblock, gotp->br_startoff,
+                        (unsigned long long)gotp->br_startblock,
-                        gotp->br_blockcount,gotp->br_state);
+                        (unsigned long long)gotp->br_startoff,
+                        (unsigned long long)gotp->br_blockcount,
+                        gotp->br_state);
        }
        return ep;
 }
@@ -3875,7 +3950,7 @@ xfs_bmap_add_attrfork(
                ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size);
                if (!ip->i_d.di_forkoff)
                        ip->i_d.di_forkoff = mp->m_attroffset >> 3;
-                else if (!(mp->m_flags & XFS_MOUNT_COMPAT_ATTR))
+                else if (mp->m_flags & XFS_MOUNT_ATTR2)
                        version = 2;
                break;
        default:
@@ -4023,13 +4098,13 @@ xfs_bmap_compute_maxlevels(
         */
        if (whichfork == XFS_DATA_FORK) {
                maxleafents = MAXEXTNUM;
-                sz = (mp->m_flags & XFS_MOUNT_COMPAT_ATTR) ?
+                sz = (mp->m_flags & XFS_MOUNT_ATTR2) ?
-                        mp->m_attroffset : XFS_BMDR_SPACE_CALC(MINDBTPTRS);
+                        XFS_BMDR_SPACE_CALC(MINDBTPTRS) : mp->m_attroffset;
        } else {
                maxleafents = MAXAEXTNUM;
-                sz = (mp->m_flags & XFS_MOUNT_COMPAT_ATTR) ?
+                sz = (mp->m_flags & XFS_MOUNT_ATTR2) ?
-                        mp->m_sb.sb_inodesize - mp->m_attroffset :
+                        XFS_BMDR_SPACE_CALC(MINABTPTRS) :
-                        XFS_BMDR_SPACE_CALC(MINABTPTRS);
+                        mp->m_sb.sb_inodesize - mp->m_attroffset;
        }
        maxrootrecs = (int)XFS_BTREE_BLOCK_MAXRECS(sz, xfs_bmdr, 0);
        minleafrecs = mp->m_bmap_dmnr[0];
@@ -4418,8 +4493,8 @@ xfs_bmap_read_extents(
                num_recs = be16_to_cpu(block->bb_numrecs);
                if (unlikely(i + num_recs > room)) {
                        ASSERT(i + num_recs <= room);
-                        xfs_fs_cmn_err(CE_WARN, ip->i_mount,
+                        xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
-                                "corrupt dinode %Lu, (btree extents).  Unmount and run xfs_repair.",
+                                "corrupt dinode %Lu, (btree extents).",
                                (unsigned long long) ip->i_ino);
                        XFS_ERROR_REPORT("xfs_bmap_read_extents(1)",
                                         XFS_ERRLEVEL_LOW,
@@ -4590,6 +4665,7 @@ xfs_bmapi(
        char            contig;         /* allocation must be one extent */
        char            delay;          /* this request is for delayed alloc */
        char            exact;          /* don't do all of wasdelayed extent */
+        char            convert;        /* unwritten extent I/O completion */
        xfs_bmbt_rec_t  *ep;            /* extent list entry pointer */
        int             error;          /* error return */
        xfs_bmbt_irec_t got;            /* current extent list record */
@@ -4643,7 +4719,7 @@ xfs_bmapi(
        }
        if (XFS_FORCED_SHUTDOWN(mp))
                return XFS_ERROR(EIO);
-        rt = XFS_IS_REALTIME_INODE(ip);
+        rt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
        ifp = XFS_IFORK_PTR(ip, whichfork);
        ASSERT(ifp->if_ext_max ==
               XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
@@ -4654,6 +4730,7 @@ xfs_bmapi(
        delay = (flags & XFS_BMAPI_DELAY) != 0;
        trim = (flags & XFS_BMAPI_ENTIRE) == 0;
        userdata = (flags & XFS_BMAPI_METADATA) == 0;
+        convert = (flags & XFS_BMAPI_CONVERT) != 0;
        exact = (flags & XFS_BMAPI_EXACT) != 0;
        rsvd = (flags & XFS_BMAPI_RSVBLOCKS) != 0;
        contig = (flags & XFS_BMAPI_CONTIG) != 0;
@@ -4748,15 +4825,25 @@ xfs_bmapi(
                        }
                        minlen = contig ? alen : 1;
                        if (delay) {
-                                xfs_extlen_t    extsz = 0;
+                                xfs_extlen_t    extsz;
                                /* Figure out the extent size, adjust alen */
                                if (rt) {
                                        if (!(extsz = ip->i_d.di_extsize))
                                                extsz = mp->m_sb.sb_rextsize;
-                                        alen = roundup(alen, extsz);
+                                } else {
-                                        extsz = alen / mp->m_sb.sb_rextsize;
+                                        extsz = ip->i_d.di_extsize;
                                }
+                                if (extsz) {
+                                        error = xfs_bmap_extsize_align(mp,
+                                                        &got, &prev, extsz,
+                                                        rt, eof, delay, convert,
+                                                        &aoff, &alen);
+                                        ASSERT(!error);
+                                }
+                                if (rt)
+                                        extsz = alen / mp->m_sb.sb_rextsize;
                                /*
                                 * Make a transaction-less quota reservation for
@@ -4785,32 +4872,33 @@ xfs_bmapi(
                                        xfs_bmap_worst_indlen(ip, alen);
                                ASSERT(indlen > 0);
-                                if (rt)
+                                if (rt) {
                                        error = xfs_mod_incore_sb(mp,
                                                        XFS_SBS_FREXTENTS,
                                                        -(extsz), rsvd);
-                                else
+                                } else {
                                        error = xfs_mod_incore_sb(mp,
                                                        XFS_SBS_FDBLOCKS,
                                                        -(alen), rsvd);
+                                }
                                if (!error) {
                                        error = xfs_mod_incore_sb(mp,
                                                        XFS_SBS_FDBLOCKS,
                                                        -(indlen), rsvd);
-                                        if (error && rt) {
+                                        if (error && rt)
-                                                xfs_mod_incore_sb(ip->i_mount,
+                                                xfs_mod_incore_sb(mp,
                                                        XFS_SBS_FREXTENTS,
                                                        extsz, rsvd);
-                                        } else if (error) {
+                                        else if (error)
-                                                xfs_mod_incore_sb(ip->i_mount,
+                                                xfs_mod_incore_sb(mp,
                                                        XFS_SBS_FDBLOCKS,
                                                        alen, rsvd);
-                                        }
                                }
                                if (error) {
-                                        if (XFS_IS_QUOTA_ON(ip->i_mount))
+                                        if (XFS_IS_QUOTA_ON(mp))
                                                /* unreserve the blocks now */
+                                                (void)
                                                XFS_TRANS_UNRESERVE_QUOTA_NBLKS(
                                                        mp, NULL, ip,
                                                        (long)alen, 0, rt ?
@@ -4849,6 +4937,7 @@ xfs_bmapi(
                                bma.firstblock = *firstblock;
                                bma.alen = alen;
                                bma.off = aoff;
+                                bma.conv = convert;
                                bma.wasdel = wasdelay;
                                bma.minlen = minlen;
                                bma.low = flist->xbf_low;
@@ -5270,8 +5359,7 @@ xfs_bunmapi(
                return 0;
        }
        XFS_STATS_INC(xs_blk_unmap);
-        isrt = (whichfork == XFS_DATA_FORK) &&
+        isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
-               (ip->i_d.di_flags & XFS_DIFLAG_REALTIME);
        start = bno;
        bno = start + len - 1;
        ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
@@ -5443,7 +5531,7 @@ xfs_bunmapi(
                }
                if (wasdel) {
                        ASSERT(STARTBLOCKVAL(del.br_startblock) > 0);
-                        /* Update realtim/data freespace, unreserve quota */
+                        /* Update realtime/data freespace, unreserve quota */
                        if (isrt) {
                                xfs_filblks_t rtexts;
@@ -5451,14 +5539,14 @@ xfs_bunmapi(
                                do_div(rtexts, mp->m_sb.sb_rextsize);
                                xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS,
                                                (int)rtexts, rsvd);
-                                XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, NULL, ip,
+                                (void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp,
-                                        -((long)del.br_blockcount), 0,
+                                        NULL, ip, -((long)del.br_blockcount), 0,
                                        XFS_QMOPT_RES_RTBLKS);
                        } else {
                                xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS,
                                                (int)del.br_blockcount, rsvd);
-                                XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, NULL, ip,
+                                (void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp,
-                                        -((long)del.br_blockcount), 0,
+                                        NULL, ip, -((long)del.br_blockcount), 0,
                                        XFS_QMOPT_RES_REGBLKS);
                        }
                        ip->i_delayed_blks -= del.br_blockcount;
@@ -5652,7 +5740,9 @@ xfs_getbmap(
                   ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
                return XFS_ERROR(EINVAL);
        if (whichfork == XFS_DATA_FORK) {
-                if (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC) {
+                if ((ip->i_d.di_extsize && (ip->i_d.di_flags &
+                                (XFS_DIFLAG_REALTIME|XFS_DIFLAG_EXTSIZE))) ||
+                    ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){
                        prealloced = 1;
                        fixlen = XFS_MAXIOFFSET(mp);
                } else {
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 2e0717a01309..12cc63dfc2c4 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -62,6 +62,10 @@ typedef	struct xfs_bmap_free
 #define XFS_BMAPI_IGSTATE       0x200   /* Ignore state - */
                                        /* combine contig. space */
 #define XFS_BMAPI_CONTIG        0x400   /* must allocate only one extent */
+/*      XFS_BMAPI_DIRECT_IO     0x800   */
+#define XFS_BMAPI_CONVERT       0x1000  /* unwritten extent conversion - */
+                                        /* need write cache flushing and no */
+                                        /* additional allocation alignments */
 #define XFS_BMAPI_AFLAG(w)      xfs_bmapi_aflag(w)
 static inline int xfs_bmapi_aflag(int w)
@@ -101,7 +105,8 @@ typedef struct xfs_bmalloca {
        char                    wasdel; /* replacing a delayed allocation */
        char                    userdata;/* set if is user data */
        char                    low;    /* low on space, using seq'l ags */
-        char                    aeof;   /* allocated space at eof */
+        char                    aeof;   /* allocated space at eof */
+        char                    conv;   /* overwriting unwritten extents */
 } xfs_bmalloca_t;
 #ifdef __KERNEL__
diff --git a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h
index 328a528b926d..f57cc9ac875e 100644
--- a/fs/xfs/xfs_clnt.h
+++ b/fs/xfs/xfs_clnt.h
@@ -57,7 +57,7 @@ struct xfs_mount_args {
 /*
 * XFS mount option flags -- args->flags1
 */
-#define XFSMNT_COMPAT_ATTR      0x00000001      /* do not use ATTR2 format */
+#define XFSMNT_ATTR2            0x00000001      /* allow ATTR2 EA format */
 #define XFSMNT_WSYNC            0x00000002      /* safe mode nfs mount
                                                 * compatible */
 #define XFSMNT_INO64            0x00000004      /* move inode numbers up
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 070259a4254c..c6191d00ad27 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -60,8 +60,6 @@ xfs_swapext(
        xfs_bstat_t     *sbp;
        struct file     *fp = NULL, *tfp = NULL;
        vnode_t         *vp, *tvp;
-        bhv_desc_t      *bdp, *tbdp;
-        vn_bhv_head_t   *bhp, *tbhp;
        static uint     lock_flags = XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL;
        int             ilf_fields, tilf_fields;
        int             error = 0;
@@ -90,13 +88,10 @@ xfs_swapext(
                goto error0;
        }
-        bhp = VN_BHV_HEAD(vp);
+        ip = xfs_vtoi(vp);
-        bdp = vn_bhv_lookup(bhp, &xfs_vnodeops);
+        if (ip == NULL) {
-        if (bdp == NULL) {
                error = XFS_ERROR(EBADF);
                goto error0;
-        } else {
-                ip = XFS_BHVTOI(bdp);
        }
        if (((tfp = fget((int)sxp->sx_fdtmp)) == NULL) ||
@@ -105,13 +100,10 @@ xfs_swapext(
                goto error0;
        }
-        tbhp = VN_BHV_HEAD(tvp);
+        tip = xfs_vtoi(tvp);
-        tbdp = vn_bhv_lookup(tbhp, &xfs_vnodeops);
+        if (tip == NULL) {
-        if (tbdp == NULL) {
                error = XFS_ERROR(EBADF);
                goto error0;
-        } else {
-                tip = XFS_BHVTOI(tbdp);
        }
        if (ip->i_mount != tip->i_mount) {
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h
index c5a0e537ff1a..79d0d9e1fbab 100644
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/xfs_dinode.h
@@ -199,10 +199,16 @@ typedef enum xfs_dinode_fmt
 #define XFS_DFORK_DSIZE(dip,mp) \
        XFS_CFORK_DSIZE_DISK(&(dip)->di_core, mp)
+#define XFS_DFORK_DSIZE_HOST(dip,mp) \
+        XFS_CFORK_DSIZE(&(dip)->di_core, mp)
 #define XFS_DFORK_ASIZE(dip,mp) \
        XFS_CFORK_ASIZE_DISK(&(dip)->di_core, mp)
+#define XFS_DFORK_ASIZE_HOST(dip,mp) \
+        XFS_CFORK_ASIZE(&(dip)->di_core, mp)
 #define XFS_DFORK_SIZE(dip,mp,w) \
        XFS_CFORK_SIZE_DISK(&(dip)->di_core, mp, w)
+#define XFS_DFORK_SIZE_HOST(dip,mp,w) \
+        XFS_CFORK_SIZE(&(dip)->di_core, mp, w)
 #define XFS_DFORK_Q(dip)                    XFS_CFORK_Q_DISK(&(dip)->di_core)
 #define XFS_DFORK_BOFF(dip)                 XFS_CFORK_BOFF_DISK(&(dip)->di_core)
@@ -216,6 +222,7 @@ typedef enum xfs_dinode_fmt
 #define XFS_CFORK_FMT_SET(dcp,w,n) \
        ((w) == XFS_DATA_FORK ? \
                ((dcp)->di_format = (n)) : ((dcp)->di_aformat = (n)))
+#define XFS_DFORK_FORMAT(dip,w) XFS_CFORK_FORMAT(&(dip)->di_core, w)
 #define XFS_CFORK_NEXTENTS_DISK(dcp,w) \
        ((w) == XFS_DATA_FORK ? \
@@ -223,13 +230,13 @@ typedef enum xfs_dinode_fmt
                INT_GET((dcp)->di_anextents, ARCH_CONVERT))
 #define XFS_CFORK_NEXTENTS(dcp,w) \
        ((w) == XFS_DATA_FORK ? (dcp)->di_nextents : (dcp)->di_anextents)
+#define XFS_DFORK_NEXTENTS(dip,w) XFS_CFORK_NEXTENTS_DISK(&(dip)->di_core, w)
+#define XFS_DFORK_NEXTENTS_HOST(dip,w) XFS_CFORK_NEXTENTS(&(dip)->di_core, w)
 #define XFS_CFORK_NEXT_SET(dcp,w,n) \
        ((w) == XFS_DATA_FORK ? \
                ((dcp)->di_nextents = (n)) : ((dcp)->di_anextents = (n)))
-#define XFS_DFORK_NEXTENTS(dip,w) XFS_CFORK_NEXTENTS_DISK(&(dip)->di_core, w)
 #define XFS_BUF_TO_DINODE(bp)   ((xfs_dinode_t *)XFS_BUF_PTR(bp))
 /*
@@ -246,8 +253,10 @@ typedef enum xfs_dinode_fmt
 #define XFS_DIFLAG_NOATIME_BIT   6      /* do not update atime */
 #define XFS_DIFLAG_NODUMP_BIT    7      /* do not dump */
 #define XFS_DIFLAG_RTINHERIT_BIT 8      /* create with realtime bit set */
-#define XFS_DIFLAG_PROJINHERIT_BIT  9   /* create with parents projid */
+#define XFS_DIFLAG_PROJINHERIT_BIT   9  /* create with parents projid */
-#define XFS_DIFLAG_NOSYMLINKS_BIT  10   /* disallow symlink creation */
+#define XFS_DIFLAG_NOSYMLINKS_BIT   10  /* disallow symlink creation */
+#define XFS_DIFLAG_EXTSIZE_BIT      11  /* inode extent size allocator hint */
+#define XFS_DIFLAG_EXTSZINHERIT_BIT 12  /* inherit inode extent size */
 #define XFS_DIFLAG_REALTIME      (1 << XFS_DIFLAG_REALTIME_BIT)
 #define XFS_DIFLAG_PREALLOC      (1 << XFS_DIFLAG_PREALLOC_BIT)
 #define XFS_DIFLAG_NEWRTBM       (1 << XFS_DIFLAG_NEWRTBM_BIT)
@@ -259,11 +268,14 @@ typedef enum xfs_dinode_fmt
 #define XFS_DIFLAG_RTINHERIT     (1 << XFS_DIFLAG_RTINHERIT_BIT)
 #define XFS_DIFLAG_PROJINHERIT   (1 << XFS_DIFLAG_PROJINHERIT_BIT)
 #define XFS_DIFLAG_NOSYMLINKS    (1 << XFS_DIFLAG_NOSYMLINKS_BIT)
+#define XFS_DIFLAG_EXTSIZE       (1 << XFS_DIFLAG_EXTSIZE_BIT)
+#define XFS_DIFLAG_EXTSZINHERIT  (1 << XFS_DIFLAG_EXTSZINHERIT_BIT)
 #define XFS_DIFLAG_ANY \
        (XFS_DIFLAG_REALTIME | XFS_DIFLAG_PREALLOC | XFS_DIFLAG_NEWRTBM | \
         XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \
         XFS_DIFLAG_NOATIME | XFS_DIFLAG_NODUMP | XFS_DIFLAG_RTINHERIT | \
-         XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS)
+         XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS | XFS_DIFLAG_EXTSIZE | \
+         XFS_DIFLAG_EXTSZINHERIT)
 #endif  /* __XFS_DINODE_H__ */
diff --git a/fs/xfs/xfs_dir.c b/fs/xfs/xfs_dir.c
index 3dd30391f551..bb87d2a700a9 100644
--- a/fs/xfs/xfs_dir.c
+++ b/fs/xfs/xfs_dir.c
@@ -176,7 +176,7 @@ xfs_dir_mount(xfs_mount_t *mp)
        uint shortcount, leafcount, count;
        mp->m_dirversion = 1;
-        if (mp->m_flags & XFS_MOUNT_COMPAT_ATTR) {
+        if (!(mp->m_flags & XFS_MOUNT_ATTR2)) {
                shortcount = (mp->m_attroffset -
                                (uint)sizeof(xfs_dir_sf_hdr_t)) /
                                 (uint)sizeof(xfs_dir_sf_entry_t);
diff --git a/fs/xfs/xfs_dir.h b/fs/xfs/xfs_dir.h
index 488defe86ba6..8cc8afb9f6c0 100644
--- a/fs/xfs/xfs_dir.h
+++ b/fs/xfs/xfs_dir.h
@@ -135,6 +135,8 @@ void	xfs_dir_startup(void);	/* called exactly once */
        ((mp)->m_dirops.xd_shortform_to_single(args))
 #define XFS_DIR_IS_V1(mp)       ((mp)->m_dirversion == 1)
+#define XFS_DIR_IS_V2(mp)       ((mp)->m_dirversion == 2)
 extern xfs_dirops_t xfsv1_dirops;
+extern xfs_dirops_t xfsv2_dirops;
 #endif  /* __XFS_DIR_H__ */
diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/xfs_dir2.h
index 7e24ffeda9e1..3158f5dc431f 100644
--- a/fs/xfs/xfs_dir2.h
+++ b/fs/xfs/xfs_dir2.h
@@ -72,9 +72,6 @@ typedef struct xfs_dir2_put_args {
        struct uio      *uio;           /* uio control structure */
 } xfs_dir2_put_args_t;
-#define XFS_DIR_IS_V2(mp)       ((mp)->m_dirversion == 2)
-extern xfs_dirops_t     xfsv2_dirops;
 /*
 * Other interfaces used by the rest of the dir v2 code.
 */
diff --git a/fs/xfs/xfs_dir_leaf.h b/fs/xfs/xfs_dir_leaf.h
index ab6b09eef9ab..eb8cd9a4667f 100644
--- a/fs/xfs/xfs_dir_leaf.h
+++ b/fs/xfs/xfs_dir_leaf.h
@@ -67,34 +67,38 @@ struct xfs_trans;
 */
 #define XFS_DIR_LEAF_MAPSIZE    3       /* how many freespace slots */
+typedef struct xfs_dir_leaf_map {       /* RLE map of free bytes */
+        __uint16_t      base;           /* base of free region */
+        __uint16_t      size;           /* run length of free region */
+} xfs_dir_leaf_map_t;
+typedef struct xfs_dir_leaf_hdr {       /* constant-structure header block */
+        xfs_da_blkinfo_t info;          /* block type, links, etc. */
+        __uint16_t      count;          /* count of active leaf_entry's */
+        __uint16_t      namebytes;      /* num bytes of name strings stored */
+        __uint16_t      firstused;      /* first used byte in name area */
+        __uint8_t       holes;          /* != 0 if blk needs compaction */
+        __uint8_t       pad1;
+        xfs_dir_leaf_map_t freemap[XFS_DIR_LEAF_MAPSIZE];
+} xfs_dir_leaf_hdr_t;
+typedef struct xfs_dir_leaf_entry {     /* sorted on key, not name */
+        xfs_dahash_t    hashval;        /* hash value of name */
+        __uint16_t      nameidx;        /* index into buffer of name */
+        __uint8_t       namelen;        /* length of name string */
+        __uint8_t       pad2;
+} xfs_dir_leaf_entry_t;
+typedef struct xfs_dir_leaf_name {
+        xfs_dir_ino_t   inumber;        /* inode number for this key */
+        __uint8_t       name[1];        /* name string itself */
+} xfs_dir_leaf_name_t;
 typedef struct xfs_dir_leafblock {
-        struct xfs_dir_leaf_hdr {       /* constant-structure header block */
+        xfs_dir_leaf_hdr_t      hdr;    /* constant-structure header block */
-                xfs_da_blkinfo_t info;  /* block type, links, etc. */
+        xfs_dir_leaf_entry_t    entries[1];     /* var sized array */
-                __uint16_t count;       /* count of active leaf_entry's */
+        xfs_dir_leaf_name_t     namelist[1];    /* grows from bottom of buf */
-                __uint16_t namebytes;   /* num bytes of name strings stored */
-                __uint16_t firstused;   /* first used byte in name area */
-                __uint8_t  holes;       /* != 0 if blk needs compaction */
-                __uint8_t  pad1;
-                struct xfs_dir_leaf_map {/* RLE map of free bytes */
-                        __uint16_t base; /* base of free region */
-                        __uint16_t size; /* run length of free region */
-                } freemap[XFS_DIR_LEAF_MAPSIZE]; /* N largest free regions */
-        } hdr;
-        struct xfs_dir_leaf_entry {     /* sorted on key, not name */
-                xfs_dahash_t hashval;   /* hash value of name */
-                __uint16_t nameidx;     /* index into buffer of name */
-                __uint8_t namelen;      /* length of name string */
-                __uint8_t pad2;
-        } entries[1];                   /* var sized array */
-        struct xfs_dir_leaf_name {
-                xfs_dir_ino_t inumber;  /* inode number for this key */
-                __uint8_t name[1];      /* name string itself */
-        } namelist[1];                  /* grows from bottom of buf */
 } xfs_dir_leafblock_t;
-typedef struct xfs_dir_leaf_hdr xfs_dir_leaf_hdr_t;
-typedef struct xfs_dir_leaf_map xfs_dir_leaf_map_t;
-typedef struct xfs_dir_leaf_entry xfs_dir_leaf_entry_t;
-typedef struct xfs_dir_leaf_name xfs_dir_leaf_name_t;
 /*
 * Length of name for which a 512-byte block filesystem
@@ -126,11 +130,10 @@ typedef union {
 #define XFS_PUT_COOKIE(c,mp,bno,entry,hash)     \
        ((c).s.be = XFS_DA_MAKE_BNOENTRY(mp, bno, entry), (c).s.h = (hash))
-typedef struct xfs_dir_put_args
+typedef struct xfs_dir_put_args {
-{
        xfs_dircook_t   cook;           /* cookie of (next) entry */
        xfs_intino_t    ino;            /* inode number */
-        struct xfs_dirent       *dbp;           /* buffer pointer */
+        struct xfs_dirent *dbp;         /* buffer pointer */
        char            *name;          /* directory entry name */
        int             namelen;        /* length of name */
        int             done;           /* output: set if value was stored */
@@ -138,7 +141,8 @@ typedef struct xfs_dir_put_args
        struct uio      *uio;           /* uio control structure */
 } xfs_dir_put_args_t;
-#define XFS_DIR_LEAF_ENTSIZE_BYNAME(len)        xfs_dir_leaf_entsize_byname(len)
+#define XFS_DIR_LEAF_ENTSIZE_BYNAME(len)        \
+        xfs_dir_leaf_entsize_byname(len)
 static inline int xfs_dir_leaf_entsize_byname(int len)
 {
        return (uint)sizeof(xfs_dir_leaf_name_t)-1 + len;
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index d7b6b5d16704..2a21c5024017 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -54,7 +54,6 @@ xfs_error_trap(int e)
                if (e != xfs_etrap[i])
                        continue;
                cmn_err(CE_NOTE, "xfs_error_trap: error %d", e);
-                debug_stop_all_cpus((void *)-1LL);
                BUG();
                break;
        }
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 06d8a8426c16..26b8e709a569 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -18,9 +18,6 @@
 #ifndef __XFS_ERROR_H__
 #define __XFS_ERROR_H__
-#define prdev(fmt,targ,args...) \
-        printk("XFS: device %s - " fmt "\n", XFS_BUFTARG_NAME(targ), ## args)
 #define XFS_ERECOVER    1       /* Failure to recover log */
 #define XFS_ELOGSTAT    2       /* Failure to stat log in user space */
 #define XFS_ENOLOGSPACE 3       /* Reservation too large */
@@ -182,8 +179,11 @@ extern int xfs_errortag_clearall_umount(int64_t fsid, char *fsname, int loud);
 struct xfs_mount;
 /* PRINTFLIKE4 */
 extern void xfs_cmn_err(int panic_tag, int level, struct xfs_mount *mp,
-                            char *fmt, ...);
+                        char *fmt, ...);
 /* PRINTFLIKE3 */
 extern void xfs_fs_cmn_err(int level, struct xfs_mount *mp, char *fmt, ...);
+#define xfs_fs_repair_cmn_err(level, mp, fmt, args...) \
+        xfs_fs_cmn_err(level, mp, fmt "  Unmount and run xfs_repair.", ## args)
 #endif  /* __XFS_ERROR_H__ */
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index ba096f80f48d..14010f1fa82f 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -3,15 +3,15 @@
 * All Rights Reserved.
 *
 * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
+ * modify it under the terms of the GNU Lesser General Public License
- * published by the Free Software Foundation.
+ * as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it would be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
+ * GNU Lesser General Public License for more details.
 *
- * You should have received a copy of the GNU General Public License
+ * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, write the Free Software Foundation,
 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
@@ -65,6 +65,8 @@ struct fsxattr {
 #define XFS_XFLAG_RTINHERIT     0x00000100      /* create with rt bit set */
 #define XFS_XFLAG_PROJINHERIT   0x00000200      /* create with parents projid */
 #define XFS_XFLAG_NOSYMLINKS    0x00000400      /* disallow symlink creation */
+#define XFS_XFLAG_EXTSIZE       0x00000800      /* extent size allocator hint */
+#define XFS_XFLAG_EXTSZINHERIT  0x00001000      /* inherit inode extent size */
 #define XFS_XFLAG_HASATTR       0x80000000      /* no DIFLAG for this   */
 /*
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index d1236d6f4045..163031c1e394 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -540,6 +540,32 @@ xfs_reserve_blocks(
        return(0);
 }
+void
+xfs_fs_log_dummy(xfs_mount_t *mp)
+{
+        xfs_trans_t *tp;
+        xfs_inode_t *ip;
+        tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1);
+        atomic_inc(&mp->m_active_trans);
+        if (xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0)) {
+                xfs_trans_cancel(tp, 0);
+                return;
+        }
+        ip = mp->m_rootip;
+        xfs_ilock(ip, XFS_ILOCK_EXCL);
+        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+        xfs_trans_ihold(tp, ip);
+        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+        xfs_trans_set_sync(tp);
+        xfs_trans_commit(tp, 0, NULL);
+        xfs_iunlock(ip, XFS_ILOCK_EXCL);
+}
 int
 xfs_fs_goingdown(
        xfs_mount_t     *mp,
diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h
index f32713f14f9a..300d0c9d61ad 100644
--- a/fs/xfs/xfs_fsops.h
+++ b/fs/xfs/xfs_fsops.h
@@ -25,5 +25,6 @@ extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt);
 extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval,
                                xfs_fsop_resblks_t *outval);
 extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags);
+extern void xfs_fs_log_dummy(xfs_mount_t *mp);
 #endif  /* __XFS_FSOPS_H__ */
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index fc19eedbd11b..8e380a1fb79b 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -493,7 +493,6 @@ xfs_iget(
 retry:
        if ((inode = iget_locked(XFS_MTOVFS(mp)->vfs_super, ino))) {
-                bhv_desc_t      *bdp;
                xfs_inode_t     *ip;
                vp = LINVFS_GET_VP(inode);
@@ -517,14 +516,12 @@ retry:
                         * to wait for the inode to go away.
                         */
                        if (is_bad_inode(inode) ||
-                            ((bdp = vn_bhv_lookup(VN_BHV_HEAD(vp),
+                            ((ip = xfs_vtoi(vp)) == NULL)) {
-                                                  &xfs_vnodeops)) == NULL)) {
                                iput(inode);
                                delay(1);
                                goto retry;
                        }
-                        ip = XFS_BHVTOI(bdp);
                        if (lock_flags != 0)
                                xfs_ilock(ip, lock_flags);
                        XFS_STATS_INC(xs_ig_found);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index df0d4572d70a..1d7f5a7e063e 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -404,9 +404,8 @@ xfs_iformat(
            INT_GET(dip->di_core.di_nextents, ARCH_CONVERT) +
                INT_GET(dip->di_core.di_anextents, ARCH_CONVERT) >
            INT_GET(dip->di_core.di_nblocks, ARCH_CONVERT))) {
-                xfs_fs_cmn_err(CE_WARN, ip->i_mount,
+                xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
-                        "corrupt dinode %Lu, extent total = %d, nblocks = %Lu."
+                        "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.",
-                        "  Unmount and run xfs_repair.",
                        (unsigned long long)ip->i_ino,
                        (int)(INT_GET(dip->di_core.di_nextents, ARCH_CONVERT)
                            + INT_GET(dip->di_core.di_anextents, ARCH_CONVERT)),
@@ -418,9 +417,8 @@ xfs_iformat(
        }
        if (unlikely(INT_GET(dip->di_core.di_forkoff, ARCH_CONVERT) > ip->i_mount->m_sb.sb_inodesize)) {
-                xfs_fs_cmn_err(CE_WARN, ip->i_mount,
+                xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
-                        "corrupt dinode %Lu, forkoff = 0x%x."
+                        "corrupt dinode %Lu, forkoff = 0x%x.",
-                        "  Unmount and run xfs_repair.",
                        (unsigned long long)ip->i_ino,
                        (int)(INT_GET(dip->di_core.di_forkoff, ARCH_CONVERT)));
                XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW,
@@ -451,8 +449,9 @@ xfs_iformat(
                         * no local regular files yet
                         */
                        if (unlikely((INT_GET(dip->di_core.di_mode, ARCH_CONVERT) & S_IFMT) == S_IFREG)) {
-                                xfs_fs_cmn_err(CE_WARN, ip->i_mount,
+                                xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
-                                        "corrupt inode (local format for regular file) %Lu.  Unmount and run xfs_repair.",
+                                        "corrupt inode %Lu "
+                                        "(local format for regular file).",
                                        (unsigned long long) ip->i_ino);
                                XFS_CORRUPTION_ERROR("xfs_iformat(4)",
                                                     XFS_ERRLEVEL_LOW,
@@ -462,8 +461,9 @@ xfs_iformat(
                        di_size = INT_GET(dip->di_core.di_size, ARCH_CONVERT);
                        if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) {
-                                xfs_fs_cmn_err(CE_WARN, ip->i_mount,
+                                xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
-                                        "corrupt inode %Lu (bad size %Ld for local inode).  Unmount and run xfs_repair.",
+                                        "corrupt inode %Lu "
+                                        "(bad size %Ld for local inode).",
                                        (unsigned long long) ip->i_ino,
                                        (long long) di_size);
                                XFS_CORRUPTION_ERROR("xfs_iformat(5)",
@@ -551,8 +551,9 @@ xfs_iformat_local(
         * kmem_alloc() or memcpy() below.
         */
        if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
-                xfs_fs_cmn_err(CE_WARN, ip->i_mount,
+                xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
-                        "corrupt inode %Lu (bad size %d for local fork, size = %d).  Unmount and run xfs_repair.",
+                        "corrupt inode %Lu "
+                        "(bad size %d for local fork, size = %d).",
                        (unsigned long long) ip->i_ino, size,
                        XFS_DFORK_SIZE(dip, ip->i_mount, whichfork));
                XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW,
@@ -610,8 +611,8 @@ xfs_iformat_extents(
         * kmem_alloc() or memcpy() below.
         */
        if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
-                xfs_fs_cmn_err(CE_WARN, ip->i_mount,
+                xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
-                        "corrupt inode %Lu ((a)extents = %d).  Unmount and run xfs_repair.",
+                        "corrupt inode %Lu ((a)extents = %d).",
                        (unsigned long long) ip->i_ino, nex);
                XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW,
                                     ip->i_mount, dip);
@@ -692,8 +693,8 @@ xfs_iformat_btree(
            || XFS_BMDR_SPACE_CALC(nrecs) >
                        XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)
            || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
-                xfs_fs_cmn_err(CE_WARN, ip->i_mount,
+                xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
-                        "corrupt inode %Lu (btree).  Unmount and run xfs_repair.",
+                        "corrupt inode %Lu (btree).",
                        (unsigned long long) ip->i_ino);
                XFS_ERROR_REPORT("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
                                 ip->i_mount);
@@ -809,6 +810,10 @@ _xfs_dic2xflags(
                        flags |= XFS_XFLAG_PROJINHERIT;
                if (di_flags & XFS_DIFLAG_NOSYMLINKS)
                        flags |= XFS_XFLAG_NOSYMLINKS;
+                if (di_flags & XFS_DIFLAG_EXTSIZE)
+                        flags |= XFS_XFLAG_EXTSIZE;
+                if (di_flags & XFS_DIFLAG_EXTSZINHERIT)
+                        flags |= XFS_XFLAG_EXTSZINHERIT;
        }
        return flags;
@@ -1192,11 +1197,19 @@ xfs_ialloc(
                        if ((mode & S_IFMT) == S_IFDIR) {
                                if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
                                        di_flags |= XFS_DIFLAG_RTINHERIT;
-                        } else {
+                                if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
+                                        di_flags |= XFS_DIFLAG_EXTSZINHERIT;
+                                        ip->i_d.di_extsize = pip->i_d.di_extsize;
+                                }
+                        } else if ((mode & S_IFMT) == S_IFREG) {
                                if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) {
                                        di_flags |= XFS_DIFLAG_REALTIME;
                                        ip->i_iocore.io_flags |= XFS_IOCORE_RT;
                                }
+                                if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
+                                        di_flags |= XFS_DIFLAG_EXTSIZE;
+                                        ip->i_d.di_extsize = pip->i_d.di_extsize;
+                                }
                        }
                        if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) &&
                            xfs_inherit_noatime)
@@ -1262,7 +1275,7 @@ xfs_isize_check(
        if ((ip->i_d.di_mode & S_IFMT) != S_IFREG)
                return;
-        if ( ip->i_d.di_flags & XFS_DIFLAG_REALTIME )
+        if (ip->i_d.di_flags & (XFS_DIFLAG_REALTIME | XFS_DIFLAG_EXTSIZE))
                return;
        nimaps = 2;
@@ -1765,22 +1778,19 @@ xfs_igrow_start(
        xfs_fsize_t     new_size,
        cred_t          *credp)
 {
-        xfs_fsize_t     isize;
        int             error;
        ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0);
        ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0);
        ASSERT(new_size > ip->i_d.di_size);
-        error = 0;
-        isize = ip->i_d.di_size;
        /*
         * Zero any pages that may have been created by
         * xfs_write_file() beyond the end of the file
         * and any blocks between the old and new file sizes.
         */
-        error = xfs_zero_eof(XFS_ITOV(ip), &ip->i_iocore, new_size, isize,
+        error = xfs_zero_eof(XFS_ITOV(ip), &ip->i_iocore, new_size,
-                                new_size);
+                             ip->i_d.di_size, new_size);
        return error;
 }
@@ -3355,6 +3365,11 @@ xfs_iflush_int(
        ip->i_update_core = 0;
        SYNCHRONIZE();
+        /*
+         * Make sure to get the latest atime from the Linux inode.
+         */
+        xfs_synchronize_atime(ip);
        if (XFS_TEST_ERROR(INT_GET(dip->di_core.di_magic,ARCH_CONVERT) != XFS_DINODE_MAGIC,
                               mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {
                xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp,
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 124d30e6143b..1cfbcf18ce86 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -436,6 +436,10 @@ void		xfs_ichgtime(xfs_inode_t *, int);
 xfs_fsize_t     xfs_file_last_byte(xfs_inode_t *);
 void            xfs_lock_inodes(xfs_inode_t **, int, int, uint);
+xfs_inode_t     *xfs_vtoi(struct vnode *vp);
+void            xfs_synchronize_atime(xfs_inode_t *);
 #define xfs_ipincount(ip)       ((unsigned int) atomic_read(&ip->i_pincount))
 #ifdef DEBUG
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 7f3363c621e1..36aa1fcb90a5 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -271,6 +271,11 @@ xfs_inode_item_format(
        if (ip->i_update_size)
                ip->i_update_size = 0;
+        /*
+         * Make sure to get the latest atime from the Linux inode.
+         */
+        xfs_synchronize_atime(ip);
        vecp->i_addr = (xfs_caddr_t)&ip->i_d;
        vecp->i_len  = sizeof(xfs_dinode_core_t);
        XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE);
@@ -603,7 +608,7 @@ xfs_inode_item_trylock(
                if (iip->ili_pushbuf_flag == 0) {
                        iip->ili_pushbuf_flag = 1;
 #ifdef DEBUG
-                        iip->ili_push_owner = get_thread_id();
+                        iip->ili_push_owner = current_pid();
 #endif
                        /*
                         * Inode is left locked in shared mode.
@@ -782,7 +787,7 @@ xfs_inode_item_pushbuf(
         * trying to duplicate our effort.
         */
        ASSERT(iip->ili_pushbuf_flag != 0);
-        ASSERT(iip->ili_push_owner == get_thread_id());
+        ASSERT(iip->ili_push_owner == current_pid());
        /*
         * If flushlock isn't locked anymore, chances are that the
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index ca7afc83a893..788917f355c4 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -262,7 +262,7 @@ phase2:
        case BMAPI_WRITE:
                /* If we found an extent, return it */
                if (nimaps &&
-                    (imap.br_startblock != HOLESTARTBLOCK) && 
+                    (imap.br_startblock != HOLESTARTBLOCK) &&
                    (imap.br_startblock != DELAYSTARTBLOCK)) {
                        xfs_iomap_map_trace(XFS_IOMAP_WRITE_MAP, io,
                                        offset, count, iomapp, &imap, flags);
@@ -317,6 +317,58 @@ out:
 }
 STATIC int
+xfs_iomap_eof_align_last_fsb(
+        xfs_mount_t     *mp,
+        xfs_iocore_t    *io,
+        xfs_fsize_t     isize,
+        xfs_extlen_t    extsize,
+        xfs_fileoff_t   *last_fsb)
+{
+        xfs_fileoff_t   new_last_fsb = 0;
+        xfs_extlen_t    align;
+        int             eof, error;
+        if (io->io_flags & XFS_IOCORE_RT)
+                ;
+        /*
+         * If mounted with the "-o swalloc" option, roundup the allocation
+         * request to a stripe width boundary if the file size is >=
+         * stripe width and we are allocating past the allocation eof.
+         */
+        else if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC) &&
+                (isize >= XFS_FSB_TO_B(mp, mp->m_swidth)))
+                new_last_fsb = roundup_64(*last_fsb, mp->m_swidth);
+        /*
+         * Roundup the allocation request to a stripe unit (m_dalign) boundary
+         * if the file size is >= stripe unit size, and we are allocating past
+         * the allocation eof.
+         */
+        else if (mp->m_dalign && (isize >= XFS_FSB_TO_B(mp, mp->m_dalign)))
+                new_last_fsb = roundup_64(*last_fsb, mp->m_dalign);
+        /*
+         * Always round up the allocation request to an extent boundary
+         * (when file on a real-time subvolume or has di_extsize hint).
+         */
+        if (extsize) {
+                if (new_last_fsb)
+                        align = roundup_64(new_last_fsb, extsize);
+                else
+                        align = extsize;
+                new_last_fsb = roundup_64(*last_fsb, align);
+        }
+        if (new_last_fsb) {
+                error = XFS_BMAP_EOF(mp, io, new_last_fsb, XFS_DATA_FORK, &eof);
+                if (error)
+                        return error;
+                if (eof)
+                        *last_fsb = new_last_fsb;
+        }
+        return 0;
+}
+STATIC int
 xfs_flush_space(
        xfs_inode_t     *ip,
        int             *fsynced,
@@ -362,19 +414,20 @@ xfs_iomap_write_direct(
        xfs_iocore_t    *io = &ip->i_iocore;
        xfs_fileoff_t   offset_fsb;
        xfs_fileoff_t   last_fsb;
-        xfs_filblks_t   count_fsb;
+        xfs_filblks_t   count_fsb, resaligned;
        xfs_fsblock_t   firstfsb;
+        xfs_extlen_t    extsz, temp;
+        xfs_fsize_t     isize;
        int             nimaps;
-        int             error;
        int             bmapi_flag;
        int             quota_flag;
        int             rt;
        xfs_trans_t     *tp;
        xfs_bmbt_irec_t imap;
        xfs_bmap_free_t free_list;
-        xfs_filblks_t   qblocks, resblks;
+        uint            qblocks, resblks, resrtextents;
        int             committed;
-        int             resrtextents;
+        int             error;
        /*
         * Make sure that the dquots are there. This doesn't hold
@@ -384,37 +437,52 @@ xfs_iomap_write_direct(
        if (error)
                return XFS_ERROR(error);
-        offset_fsb = XFS_B_TO_FSBT(mp, offset);
+        rt = XFS_IS_REALTIME_INODE(ip);
-        last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
+        if (unlikely(rt)) {
-        count_fsb = last_fsb - offset_fsb;
+                if (!(extsz = ip->i_d.di_extsize))
-        if (found && (ret_imap->br_startblock == HOLESTARTBLOCK)) {
+                        extsz = mp->m_sb.sb_rextsize;
-                xfs_fileoff_t   map_last_fsb;
+        } else {
+                extsz = ip->i_d.di_extsize;
-                map_last_fsb = ret_imap->br_blockcount + ret_imap->br_startoff;
-                if (map_last_fsb < last_fsb) {
-                        last_fsb = map_last_fsb;
-                        count_fsb = last_fsb - offset_fsb;
-                }
-                ASSERT(count_fsb > 0);
        }
-        /*
+        isize = ip->i_d.di_size;
-         * Determine if reserving space on the data or realtime partition.
+        if (io->io_new_size > isize)
-         */
+                isize = io->io_new_size;
-        if ((rt = XFS_IS_REALTIME_INODE(ip))) {
-                xfs_extlen_t    extsz;
-                if (!(extsz = ip->i_d.di_extsize))
+        offset_fsb = XFS_B_TO_FSBT(mp, offset);
-                        extsz = mp->m_sb.sb_rextsize;
+        last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
-                resrtextents = qblocks = (count_fsb + extsz - 1);
+        if ((offset + count) > isize) {
-                do_div(resrtextents, mp->m_sb.sb_rextsize);
+                error = xfs_iomap_eof_align_last_fsb(mp, io, isize, extsz,
-                resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
+                                                        &last_fsb);
-                quota_flag = XFS_QMOPT_RES_RTBLKS;
+                if (error)
+                        goto error_out;
        } else {
-                resrtextents = 0;
+                if (found && (ret_imap->br_startblock == HOLESTARTBLOCK))
-                resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, count_fsb);
+                        last_fsb = MIN(last_fsb, (xfs_fileoff_t)
-                quota_flag = XFS_QMOPT_RES_REGBLKS;
+                                        ret_imap->br_blockcount +
+                                        ret_imap->br_startoff);
        }
+        count_fsb = last_fsb - offset_fsb;
+        ASSERT(count_fsb > 0);
+        resaligned = count_fsb;
+        if (unlikely(extsz)) {
+                if ((temp = do_mod(offset_fsb, extsz)))
+                        resaligned += temp;
+                if ((temp = do_mod(resaligned, extsz)))
+                        resaligned += extsz - temp;
+        }
+        if (unlikely(rt)) {
+                resrtextents = qblocks = resaligned;
+                resrtextents /= mp->m_sb.sb_rextsize;
+                resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
+                quota_flag = XFS_QMOPT_RES_RTBLKS;
+        } else {
+                resrtextents = 0;
+                resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
+                quota_flag = XFS_QMOPT_RES_REGBLKS;
+        }
        /*
         * Allocate and setup the transaction
@@ -425,7 +493,6 @@ xfs_iomap_write_direct(
                        XFS_WRITE_LOG_RES(mp), resrtextents,
                        XFS_TRANS_PERM_LOG_RES,
                        XFS_WRITE_LOG_COUNT);
        /*
         * Check for running out of space, note: need lock to return
         */
@@ -435,20 +502,20 @@ xfs_iomap_write_direct(
        if (error)
                goto error_out;
-        if (XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag)) {
+        error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip,
-                error = (EDQUOT);
+                                              qblocks, 0, quota_flag);
+        if (error)
                goto error1;
-        }
-        bmapi_flag = XFS_BMAPI_WRITE;
        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
        xfs_trans_ihold(tp, ip);
-        if (!(flags & BMAPI_MMAP) && (offset < ip->i_d.di_size || rt))
+        bmapi_flag = XFS_BMAPI_WRITE;
+        if ((flags & BMAPI_DIRECT) && (offset < ip->i_d.di_size || extsz))
                bmapi_flag |= XFS_BMAPI_PREALLOC;
        /*
-         * Issue the bmapi() call to allocate the blocks
+         * Issue the xfs_bmapi() call to allocate the blocks
         */
        XFS_BMAP_INIT(&free_list, &firstfsb);
        nimaps = 1;
@@ -483,8 +550,10 @@ xfs_iomap_write_direct(
                        "extent-state : %x \n",
                        (ip->i_mount)->m_fsname,
                        (long long)ip->i_ino,
-                        ret_imap->br_startblock, ret_imap->br_startoff,
+                        (unsigned long long)ret_imap->br_startblock,
-                        ret_imap->br_blockcount,ret_imap->br_state);
+                        (unsigned long long)ret_imap->br_startoff,
+                        (unsigned long long)ret_imap->br_blockcount,
+                        ret_imap->br_state);
        }
        return 0;
@@ -500,6 +569,63 @@ error_out:
        return XFS_ERROR(error);
 }
+/*
+ * If the caller is doing a write at the end of the file,
+ * then extend the allocation out to the file system's write
+ * iosize.  We clean up any extra space left over when the
+ * file is closed in xfs_inactive().
+ *
+ * For sync writes, we are flushing delayed allocate space to
+ * try to make additional space available for allocation near
+ * the filesystem full boundary - preallocation hurts in that
+ * situation, of course.
+ */
+STATIC int
+xfs_iomap_eof_want_preallocate(
+        xfs_mount_t     *mp,
+        xfs_iocore_t    *io,
+        xfs_fsize_t     isize,
+        xfs_off_t       offset,
+        size_t          count,
+        int             ioflag,
+        xfs_bmbt_irec_t *imap,
+        int             nimaps,
+        int             *prealloc)
+{
+        xfs_fileoff_t   start_fsb;
+        xfs_filblks_t   count_fsb;
+        xfs_fsblock_t   firstblock;
+        int             n, error, imaps;
+        *prealloc = 0;
+        if ((ioflag & BMAPI_SYNC) || (offset + count) <= isize)
+                return 0;
+        /*
+         * If there are any real blocks past eof, then don't
+         * do any speculative allocation.
+         */
+        start_fsb = XFS_B_TO_FSBT(mp, ((xfs_ufsize_t)(offset + count - 1)));
+        count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
+        while (count_fsb > 0) {
+                imaps = nimaps;
+                firstblock = NULLFSBLOCK;
+                error = XFS_BMAPI(mp, NULL, io, start_fsb, count_fsb,
+                                  0, &firstblock, 0, imap, &imaps, NULL);
+                if (error)
+                        return error;
+                for (n = 0; n < imaps; n++) {
+                        if ((imap[n].br_startblock != HOLESTARTBLOCK) &&
+                            (imap[n].br_startblock != DELAYSTARTBLOCK))
+                                return 0;
+                        start_fsb += imap[n].br_blockcount;
+                        count_fsb -= imap[n].br_blockcount;
+                }
+        }
+        *prealloc = 1;
+        return 0;
+}
 int
 xfs_iomap_write_delay(
        xfs_inode_t     *ip,
@@ -513,13 +639,15 @@ xfs_iomap_write_delay(
        xfs_iocore_t    *io = &ip->i_iocore;
        xfs_fileoff_t   offset_fsb;
        xfs_fileoff_t   last_fsb;
-        xfs_fsize_t     isize;
+        xfs_off_t       aligned_offset;
+        xfs_fileoff_t   ioalign;
        xfs_fsblock_t   firstblock;
+        xfs_extlen_t    extsz;
+        xfs_fsize_t     isize;
        int             nimaps;
-        int             error;
        xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS];
-        int             aeof;
+        int             prealloc, fsynced = 0;
-        int             fsynced = 0;
+        int             error;
        ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0);
@@ -527,152 +655,57 @@ xfs_iomap_write_delay(
         * Make sure that the dquots are there. This doesn't hold
         * the ilock across a disk read.
         */
        error = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED);
        if (error)
                return XFS_ERROR(error);
+        if (XFS_IS_REALTIME_INODE(ip)) {
+                if (!(extsz = ip->i_d.di_extsize))
+                        extsz = mp->m_sb.sb_rextsize;
+        } else {
+                extsz = ip->i_d.di_extsize;
+        }
+        offset_fsb = XFS_B_TO_FSBT(mp, offset);
 retry:
        isize = ip->i_d.di_size;
-        if (io->io_new_size > isize) {
+        if (io->io_new_size > isize)
                isize = io->io_new_size;
-        }
-        aeof = 0;
+        error = xfs_iomap_eof_want_preallocate(mp, io, isize, offset, count,
-        offset_fsb = XFS_B_TO_FSBT(mp, offset);
+                                ioflag, imap, XFS_WRITE_IMAPS, &prealloc);
-        last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
+        if (error)
-        /*
+                return error;
-         * If the caller is doing a write at the end of the file,
-         * then extend the allocation (and the buffer used for the write)
-         * out to the file system's write iosize.  We clean up any extra
-         * space left over when the file is closed in xfs_inactive().
-         *
-         * For sync writes, we are flushing delayed allocate space to
-         * try to make additional space available for allocation near
-         * the filesystem full boundary - preallocation hurts in that
-         * situation, of course.
-         */
-        if (!(ioflag & BMAPI_SYNC) && ((offset + count) > ip->i_d.di_size)) {
-                xfs_off_t       aligned_offset;
-                xfs_filblks_t   count_fsb;
-                unsigned int    iosize;
-                xfs_fileoff_t   ioalign;
-                int             n;
-                xfs_fileoff_t   start_fsb;
-                /*
+        if (prealloc) {
-                 * If there are any real blocks past eof, then don't
-                 * do any speculative allocation.
-                 */
-                start_fsb = XFS_B_TO_FSBT(mp,
-                                        ((xfs_ufsize_t)(offset + count - 1)));
-                count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
-                while (count_fsb > 0) {
-                        nimaps = XFS_WRITE_IMAPS;
-                        error = XFS_BMAPI(mp, NULL, io, start_fsb, count_fsb,
-                                        0, &firstblock, 0, imap, &nimaps, NULL);
-                        if (error) {
-                                return error;
-                        }
-                        for (n = 0; n < nimaps; n++) {
-                                if ( !(io->io_flags & XFS_IOCORE_RT)  && 
-                                        !imap[n].br_startblock) {
-                                        cmn_err(CE_PANIC,"Access to block "
-                                                "zero:  fs <%s> inode: %lld "
-                                                "start_block : %llx start_off "
-                                                ": %llx blkcnt : %llx "
-                                                "extent-state : %x \n",
-                                                (ip->i_mount)->m_fsname,
-                                                (long long)ip->i_ino,
-                                                imap[n].br_startblock,
-                                                imap[n].br_startoff,
-                                                imap[n].br_blockcount,
-                                                imap[n].br_state);
-                                }
-                                if ((imap[n].br_startblock != HOLESTARTBLOCK) &&
-                                    (imap[n].br_startblock != DELAYSTARTBLOCK)) {
-                                        goto write_map;
-                                }
-                                start_fsb += imap[n].br_blockcount;
-                                count_fsb -= imap[n].br_blockcount;
-                        }
-                }
-                iosize = mp->m_writeio_blocks;
                aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1));
                ioalign = XFS_B_TO_FSBT(mp, aligned_offset);
-                last_fsb = ioalign + iosize;
+                last_fsb = ioalign + mp->m_writeio_blocks;
-                aeof = 1;
+        } else {
+                last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
        }
-write_map:
-        nimaps = XFS_WRITE_IMAPS;
-        firstblock = NULLFSBLOCK;
-        /*
+        if (prealloc || extsz) {
-         * If mounted with the "-o swalloc" option, roundup the allocation
+                error = xfs_iomap_eof_align_last_fsb(mp, io, isize, extsz,
-         * request to a stripe width boundary if the file size is >=
+                                                        &last_fsb);
-         * stripe width and we are allocating past the allocation eof.
+                if (error)
-         */
-        if (!(io->io_flags & XFS_IOCORE_RT) && mp->m_swidth 
-            && (mp->m_flags & XFS_MOUNT_SWALLOC)
-            && (isize >= XFS_FSB_TO_B(mp, mp->m_swidth)) && aeof) {
-                int eof;
-                xfs_fileoff_t new_last_fsb;
-                new_last_fsb = roundup_64(last_fsb, mp->m_swidth);
-                error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof);
-                if (error) {
-                        return error;
-                }
-                if (eof) {
-                        last_fsb = new_last_fsb;
-                }
-        /*
-         * Roundup the allocation request to a stripe unit (m_dalign) boundary
-         * if the file size is >= stripe unit size, and we are allocating past
-         * the allocation eof.
-         */
-        } else if (!(io->io_flags & XFS_IOCORE_RT) && mp->m_dalign &&
-                   (isize >= XFS_FSB_TO_B(mp, mp->m_dalign)) && aeof) {
-                int eof;
-                xfs_fileoff_t new_last_fsb;
-                new_last_fsb = roundup_64(last_fsb, mp->m_dalign);
-                error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof);
-                if (error) {
-                        return error;
-                }
-                if (eof) {
-                        last_fsb = new_last_fsb;
-                }
-        /*
-         * Round up the allocation request to a real-time extent boundary
-         * if the file is on the real-time subvolume.
-         */
-        } else if (io->io_flags & XFS_IOCORE_RT && aeof) {
-                int eof;
-                xfs_fileoff_t new_last_fsb;
-                new_last_fsb = roundup_64(last_fsb, mp->m_sb.sb_rextsize);
-                error = XFS_BMAP_EOF(mp, io, new_last_fsb, XFS_DATA_FORK, &eof);
-                if (error) {
                        return error;
-                }
-                if (eof)
-                        last_fsb = new_last_fsb;
        }
+        nimaps = XFS_WRITE_IMAPS;
+        firstblock = NULLFSBLOCK;
        error = xfs_bmapi(NULL, ip, offset_fsb,
                          (xfs_filblks_t)(last_fsb - offset_fsb),
                          XFS_BMAPI_DELAY | XFS_BMAPI_WRITE |
                          XFS_BMAPI_ENTIRE, &firstblock, 1, imap,
                          &nimaps, NULL);
-        /*
+        if (error && (error != ENOSPC))
-         * This can be EDQUOT, if nimaps == 0
-         */
-        if (error && (error != ENOSPC)) {
                return XFS_ERROR(error);
-        }
        /*
         * If bmapi returned us nothing, and if we didn't get back EDQUOT,
-         * then we must have run out of space.
+         * then we must have run out of space - flush delalloc, and retry..
         */
        if (nimaps == 0) {
                xfs_iomap_enter_trace(XFS_IOMAP_WRITE_NOSPACE,
@@ -684,17 +717,21 @@ write_map:
                goto retry;
        }
-        *ret_imap = imap[0];
+        if (!(io->io_flags & XFS_IOCORE_RT)  && !ret_imap->br_startblock) {
-        *nmaps = 1;
-        if ( !(io->io_flags & XFS_IOCORE_RT)  && !ret_imap->br_startblock) {
                cmn_err(CE_PANIC,"Access to block zero:  fs <%s> inode: %lld "
                        "start_block : %llx start_off : %llx blkcnt : %llx "
                        "extent-state : %x \n",
                        (ip->i_mount)->m_fsname,
                        (long long)ip->i_ino,
-                        ret_imap->br_startblock, ret_imap->br_startoff,
+                        (unsigned long long)ret_imap->br_startblock,
-                        ret_imap->br_blockcount,ret_imap->br_state);
+                        (unsigned long long)ret_imap->br_startoff,
+                        (unsigned long long)ret_imap->br_blockcount,
+                        ret_imap->br_state);
        }
+        *ret_imap = imap[0];
+        *nmaps = 1;
        return 0;
 }
@@ -820,17 +857,21 @@ xfs_iomap_write_allocate(
                 */
                for (i = 0; i < nimaps; i++) {
-                        if ( !(io->io_flags & XFS_IOCORE_RT)  && 
+                        if (!(io->io_flags & XFS_IOCORE_RT)  &&
-                                !imap[i].br_startblock) {
+                            !imap[i].br_startblock) {
                                cmn_err(CE_PANIC,"Access to block zero:  "
                                        "fs <%s> inode: %lld "
-                                        "start_block : %llx start_off : %llx " 
+                                        "start_block : %llx start_off : %llx "
                                        "blkcnt : %llx extent-state : %x \n",
                                        (ip->i_mount)->m_fsname,
                                        (long long)ip->i_ino,
-                                        imap[i].br_startblock,
+                                        (unsigned long long)
-                                        imap[i].br_startoff,
+                                                imap[i].br_startblock,
-                                        imap[i].br_blockcount,imap[i].br_state);
+                                        (unsigned long long)
+                                                imap[i].br_startoff,
+                                        (unsigned long long)
+                                                imap[i].br_blockcount,
+                                        imap[i].br_state);
                        }
                        if ((offset_fsb >= imap[i].br_startoff) &&
                            (offset_fsb < (imap[i].br_startoff +
@@ -867,17 +908,17 @@ xfs_iomap_write_unwritten(
 {
        xfs_mount_t     *mp = ip->i_mount;
        xfs_iocore_t    *io = &ip->i_iocore;
-        xfs_trans_t     *tp;
        xfs_fileoff_t   offset_fsb;
        xfs_filblks_t   count_fsb;
        xfs_filblks_t   numblks_fsb;
-        xfs_bmbt_irec_t imap;
+        xfs_fsblock_t   firstfsb;
+        int             nimaps;
+        xfs_trans_t     *tp;
+        xfs_bmbt_irec_t imap;
+        xfs_bmap_free_t free_list;
+        uint            resblks;
        int             committed;
        int             error;
-        int             nres;
-        int             nimaps;
-        xfs_fsblock_t   firstfsb;
-        xfs_bmap_free_t free_list;
        xfs_iomap_enter_trace(XFS_IOMAP_UNWRITTEN,
                                &ip->i_iocore, offset, count);
@@ -886,9 +927,9 @@ xfs_iomap_write_unwritten(
        count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
        count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb);
-        do {
+        resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1;
-                nres = XFS_DIOSTRAT_SPACE_RES(mp, 0);
+        do {
                /*
                 * set up a transaction to convert the range of extents
                 * from unwritten to real. Do allocations in a loop until
@@ -896,7 +937,7 @@ xfs_iomap_write_unwritten(
                 */
                tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
-                error = xfs_trans_reserve(tp, nres,
+                error = xfs_trans_reserve(tp, resblks,
                                XFS_WRITE_LOG_RES(mp), 0,
                                XFS_TRANS_PERM_LOG_RES,
                                XFS_WRITE_LOG_COUNT);
@@ -915,7 +956,7 @@ xfs_iomap_write_unwritten(
                XFS_BMAP_INIT(&free_list, &firstfsb);
                nimaps = 1;
                error = xfs_bmapi(tp, ip, offset_fsb, count_fsb,
-                                  XFS_BMAPI_WRITE, &firstfsb,
+                                  XFS_BMAPI_WRITE|XFS_BMAPI_CONVERT, &firstfsb,
                                  1, &imap, &nimaps, &free_list);
                if (error)
                        goto error_on_bmapi_transaction;
@@ -929,15 +970,17 @@ xfs_iomap_write_unwritten(
                xfs_iunlock(ip, XFS_ILOCK_EXCL);
                if (error)
                        goto error0;
-                
                if ( !(io->io_flags & XFS_IOCORE_RT)  && !imap.br_startblock) {
                        cmn_err(CE_PANIC,"Access to block zero:  fs <%s> "
                                "inode: %lld start_block : %llx start_off : "
                                "%llx blkcnt : %llx extent-state : %x \n",
                                (ip->i_mount)->m_fsname,
                                (long long)ip->i_ino,
-                                imap.br_startblock,imap.br_startoff,
+                                (unsigned long long)imap.br_startblock,
-                                imap.br_blockcount,imap.br_state);
+                                (unsigned long long)imap.br_startoff,
+                                (unsigned long long)imap.br_blockcount,
+                                imap.br_state);
                }
                if ((numblks_fsb = imap.br_blockcount) == 0) {
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index f63646ead816..c59450e1be40 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -56,6 +56,7 @@ xfs_bulkstat_one_iget(
 {
        xfs_dinode_core_t *dic;         /* dinode core info pointer */
        xfs_inode_t     *ip;            /* incore inode pointer */
+        vnode_t         *vp;
        int             error;
        error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, bno);
@@ -72,6 +73,7 @@ xfs_bulkstat_one_iget(
                goto out_iput;
        }
+        vp = XFS_ITOV(ip);
        dic = &ip->i_d;
        /* xfs_iget returns the following without needing
@@ -84,8 +86,7 @@ xfs_bulkstat_one_iget(
        buf->bs_uid = dic->di_uid;
        buf->bs_gid = dic->di_gid;
        buf->bs_size = dic->di_size;
-        buf->bs_atime.tv_sec = dic->di_atime.t_sec;
+        vn_atime_to_bstime(vp, &buf->bs_atime);
-        buf->bs_atime.tv_nsec = dic->di_atime.t_nsec;
        buf->bs_mtime.tv_sec = dic->di_mtime.t_sec;
        buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec;
        buf->bs_ctime.tv_sec = dic->di_ctime.t_sec;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 29af51275ca9..3d9a36e77363 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -178,6 +178,83 @@ xlog_trace_iclog(xlog_in_core_t *iclog, uint state)
 #define xlog_trace_iclog(iclog,state)
 #endif /* XFS_LOG_TRACE */
+static void
+xlog_ins_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic)
+{
+        if (*qp) {
+                tic->t_next         = (*qp);
+                tic->t_prev         = (*qp)->t_prev;
+                (*qp)->t_prev->t_next = tic;
+                (*qp)->t_prev       = tic;
+        } else {
+                tic->t_prev = tic->t_next = tic;
+                *qp = tic;
+        }
+        tic->t_flags |= XLOG_TIC_IN_Q;
+}
+static void
+xlog_del_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic)
+{
+        if (tic == tic->t_next) {
+                *qp = NULL;
+        } else {
+                *qp = tic->t_next;
+                tic->t_next->t_prev = tic->t_prev;
+                tic->t_prev->t_next = tic->t_next;
+        }
+        tic->t_next = tic->t_prev = NULL;
+        tic->t_flags &= ~XLOG_TIC_IN_Q;
+}
+static void
+xlog_grant_sub_space(struct log *log, int bytes)
+{
+        log->l_grant_write_bytes -= bytes;
+        if (log->l_grant_write_bytes < 0) {
+                log->l_grant_write_bytes += log->l_logsize;
+                log->l_grant_write_cycle--;
+        }
+        log->l_grant_reserve_bytes -= bytes;
+        if ((log)->l_grant_reserve_bytes < 0) {
+                log->l_grant_reserve_bytes += log->l_logsize;
+                log->l_grant_reserve_cycle--;
+        }
+}
+static void
+xlog_grant_add_space_write(struct log *log, int bytes)
+{
+        log->l_grant_write_bytes += bytes;
+        if (log->l_grant_write_bytes > log->l_logsize) {
+                log->l_grant_write_bytes -= log->l_logsize;
+                log->l_grant_write_cycle++;
+        }
+}
+static void
+xlog_grant_add_space_reserve(struct log *log, int bytes)
+{
+        log->l_grant_reserve_bytes += bytes;
+        if (log->l_grant_reserve_bytes > log->l_logsize) {
+                log->l_grant_reserve_bytes -= log->l_logsize;
+                log->l_grant_reserve_cycle++;
+        }
+}
+static inline void
+xlog_grant_add_space(struct log *log, int bytes)
+{
+        xlog_grant_add_space_write(log, bytes);
+        xlog_grant_add_space_reserve(log, bytes);
+}
 /*
 * NOTES:
 *
@@ -428,7 +505,7 @@ xfs_log_mount(xfs_mount_t	*mp,
                if (readonly)
                        vfsp->vfs_flag &= ~VFS_RDONLY;
-                error = xlog_recover(mp->m_log, readonly);
+                error = xlog_recover(mp->m_log);
                if (readonly)
                        vfsp->vfs_flag |= VFS_RDONLY;
@@ -1320,8 +1397,7 @@ xlog_sync(xlog_t		*log,
        /* move grant heads by roundoff in sync */
        s = GRANT_LOCK(log);
-        XLOG_GRANT_ADD_SPACE(log, roundoff, 'w');
+        xlog_grant_add_space(log, roundoff);
-        XLOG_GRANT_ADD_SPACE(log, roundoff, 'r');
        GRANT_UNLOCK(log, s);
        /* put cycle number in every block */
@@ -1515,7 +1591,6 @@ xlog_state_finish_copy(xlog_t		*log,
 * print out info relating to regions written which consume
 * the reservation
 */
-#if defined(XFS_LOG_RES_DEBUG)
 STATIC void
 xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)
 {
@@ -1605,11 +1680,11 @@ xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)
                        ticket->t_res_arr_sum, ticket->t_res_o_flow,
                        ticket->t_res_num_ophdrs, ophdr_spc,
                        ticket->t_res_arr_sum + 
-                          ticket->t_res_o_flow + ophdr_spc,
+                        ticket->t_res_o_flow + ophdr_spc,
                        ticket->t_res_num);
        for (i = 0; i < ticket->t_res_num; i++) {
-                uint r_type = ticket->t_res_arr[i].r_type; 
+                uint r_type = ticket->t_res_arr[i].r_type; 
                cmn_err(CE_WARN,
                            "region[%u]: %s - %u bytes\n",
                            i, 
@@ -1618,9 +1693,6 @@ xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)
                            ticket->t_res_arr[i].r_len);
        }
 }
-#else
-#define xlog_print_tic_res(mp, ticket)
-#endif
 /*
 * Write some region out to in-core log
@@ -2389,7 +2461,7 @@ xlog_grant_log_space(xlog_t	   *log,
        /* something is already sleeping; insert new transaction at end */
        if (log->l_reserve_headq) {
-                XLOG_INS_TICKETQ(log->l_reserve_headq, tic);
+                xlog_ins_ticketq(&log->l_reserve_headq, tic);
                xlog_trace_loggrant(log, tic,
                                    "xlog_grant_log_space: sleep 1");
                /*
@@ -2422,7 +2494,7 @@ redo:
                                     log->l_grant_reserve_bytes);
        if (free_bytes < need_bytes) {
                if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
-                        XLOG_INS_TICKETQ(log->l_reserve_headq, tic);
+                        xlog_ins_ticketq(&log->l_reserve_headq, tic);
                xlog_trace_loggrant(log, tic,
                                    "xlog_grant_log_space: sleep 2");
                XFS_STATS_INC(xs_sleep_logspace);
@@ -2439,11 +2511,10 @@ redo:
                s = GRANT_LOCK(log);
                goto redo;
        } else if (tic->t_flags & XLOG_TIC_IN_Q)
-                XLOG_DEL_TICKETQ(log->l_reserve_headq, tic);
+                xlog_del_ticketq(&log->l_reserve_headq, tic);
        /* we've got enough space */
-        XLOG_GRANT_ADD_SPACE(log, need_bytes, 'w');
+        xlog_grant_add_space(log, need_bytes);
-        XLOG_GRANT_ADD_SPACE(log, need_bytes, 'r');
 #ifdef DEBUG
        tail_lsn = log->l_tail_lsn;
        /*
@@ -2464,7 +2535,7 @@ redo:
 error_return:
        if (tic->t_flags & XLOG_TIC_IN_Q)
-                XLOG_DEL_TICKETQ(log->l_reserve_headq, tic);
+                xlog_del_ticketq(&log->l_reserve_headq, tic);
        xlog_trace_loggrant(log, tic, "xlog_grant_log_space: err_ret");
        /*
         * If we are failing, make sure the ticket doesn't have any
@@ -2533,7 +2604,7 @@ xlog_regrant_write_log_space(xlog_t	   *log,
                if (ntic != log->l_write_headq) {
                        if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
-                                XLOG_INS_TICKETQ(log->l_write_headq, tic);
+                                xlog_ins_ticketq(&log->l_write_headq, tic);
                        xlog_trace_loggrant(log, tic,
                                    "xlog_regrant_write_log_space: sleep 1");
@@ -2565,7 +2636,7 @@ redo:
                                     log->l_grant_write_bytes);
        if (free_bytes < need_bytes) {
                if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
-                        XLOG_INS_TICKETQ(log->l_write_headq, tic);
+                        xlog_ins_ticketq(&log->l_write_headq, tic);
                XFS_STATS_INC(xs_sleep_logspace);
                sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s);
@@ -2581,9 +2652,10 @@ redo:
                s = GRANT_LOCK(log);
                goto redo;
        } else if (tic->t_flags & XLOG_TIC_IN_Q)
-                XLOG_DEL_TICKETQ(log->l_write_headq, tic);
+                xlog_del_ticketq(&log->l_write_headq, tic);
-        XLOG_GRANT_ADD_SPACE(log, need_bytes, 'w'); /* we've got enough space */
+        /* we've got enough space */
+        xlog_grant_add_space_write(log, need_bytes);
 #ifdef DEBUG
        tail_lsn = log->l_tail_lsn;
        if (CYCLE_LSN(tail_lsn) != log->l_grant_write_cycle) {
@@ -2600,7 +2672,7 @@ redo:
 error_return:
        if (tic->t_flags & XLOG_TIC_IN_Q)
-                XLOG_DEL_TICKETQ(log->l_reserve_headq, tic);
+                xlog_del_ticketq(&log->l_reserve_headq, tic);
        xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: err_ret");
        /*
         * If we are failing, make sure the ticket doesn't have any
@@ -2633,8 +2705,7 @@ xlog_regrant_reserve_log_space(xlog_t	     *log,
                ticket->t_cnt--;
        s = GRANT_LOCK(log);
-        XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'w');
+        xlog_grant_sub_space(log, ticket->t_curr_res);
-        XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'r');
        ticket->t_curr_res = ticket->t_unit_res;
        XLOG_TIC_RESET_RES(ticket);
        xlog_trace_loggrant(log, ticket,
@@ -2647,7 +2718,7 @@ xlog_regrant_reserve_log_space(xlog_t	     *log,
                return;
        }
-        XLOG_GRANT_ADD_SPACE(log, ticket->t_unit_res, 'r');
+        xlog_grant_add_space_reserve(log, ticket->t_unit_res);
        xlog_trace_loggrant(log, ticket,
                            "xlog_regrant_reserve_log_space: exit");
        xlog_verify_grant_head(log, 0);
@@ -2683,8 +2754,7 @@ xlog_ungrant_log_space(xlog_t	     *log,
        s = GRANT_LOCK(log);
        xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: enter");
-        XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'w');
+        xlog_grant_sub_space(log, ticket->t_curr_res);
-        XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'r');
        xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: sub current");
@@ -2693,8 +2763,7 @@ xlog_ungrant_log_space(xlog_t	     *log,
         */
        if (ticket->t_cnt > 0) {
                ASSERT(ticket->t_flags & XLOG_TIC_PERM_RESERV);
-                XLOG_GRANT_SUB_SPACE(log, ticket->t_unit_res*ticket->t_cnt,'w');
+                xlog_grant_sub_space(log, ticket->t_unit_res*ticket->t_cnt);
-                XLOG_GRANT_SUB_SPACE(log, ticket->t_unit_res*ticket->t_cnt,'r');
        }
        xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: exit");
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index f40d4391fcfc..4b2ac88dbb83 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -96,7 +96,6 @@ static inline xfs_lsn_t	_lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
 /* Region types for iovec's i_type */
-#if defined(XFS_LOG_RES_DEBUG)
 #define XLOG_REG_TYPE_BFORMAT           1
 #define XLOG_REG_TYPE_BCHUNK            2
 #define XLOG_REG_TYPE_EFI_FORMAT        3
@@ -117,21 +116,13 @@ static inline xfs_lsn_t	_lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
 #define XLOG_REG_TYPE_COMMIT            18
 #define XLOG_REG_TYPE_TRANSHDR          19
 #define XLOG_REG_TYPE_MAX               19
-#endif
-#if defined(XFS_LOG_RES_DEBUG)
 #define XLOG_VEC_SET_TYPE(vecp, t) ((vecp)->i_type = (t))
-#else
-#define XLOG_VEC_SET_TYPE(vecp, t)
-#endif
 typedef struct xfs_log_iovec {
        xfs_caddr_t             i_addr;         /* beginning address of region */
        int             i_len;          /* length in bytes of region */
-#if defined(XFS_LOG_RES_DEBUG)
+        uint            i_type;         /* type of region */
-        uint            i_type;         /* type of region */
-#endif
 } xfs_log_iovec_t;
 typedef void* xfs_log_ticket_t;
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 4518b188ade6..34bcbf50789c 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -253,7 +253,6 @@ typedef __uint32_t xlog_tid_t;
 /* Ticket reservation region accounting */ 
-#if defined(XFS_LOG_RES_DEBUG)
 #define XLOG_TIC_LEN_MAX        15
 #define XLOG_TIC_RESET_RES(t) ((t)->t_res_num = \
                                (t)->t_res_arr_sum = (t)->t_res_num_ophdrs = 0)
@@ -278,15 +277,9 @@ typedef __uint32_t xlog_tid_t;
 * we don't care about.
 */
 typedef struct xlog_res {
-        uint    r_len;
+        uint    r_len;  /* region length                :4 */
-        uint    r_type;
+        uint    r_type; /* region's transaction type    :4 */
 } xlog_res_t;
-#else
-#define XLOG_TIC_RESET_RES(t)
-#define XLOG_TIC_ADD_OPHDR(t)
-#define XLOG_TIC_ADD_REGION(t, len, type)
-#endif
 typedef struct xlog_ticket {
        sv_t               t_sema;       /* sleep on this semaphore      : 20 */
@@ -301,14 +294,12 @@ typedef struct xlog_ticket {
        char               t_flags;      /* properties of reservation    : 1  */
        uint               t_trans_type; /* transaction type             : 4  */
-#if defined (XFS_LOG_RES_DEBUG)
        /* reservation array fields */
        uint               t_res_num;                    /* num in array : 4 */
-        xlog_res_t         t_res_arr[XLOG_TIC_LEN_MAX];  /* array of res : X */ 
        uint               t_res_num_ophdrs;             /* num op hdrs  : 4 */
        uint               t_res_arr_sum;                /* array sum    : 4 */
        uint               t_res_o_flow;                 /* sum overflow : 4 */
-#endif
+        xlog_res_t         t_res_arr[XLOG_TIC_LEN_MAX];  /* array of res : 8 * 15 */ 
 } xlog_ticket_t;
 #endif
@@ -494,71 +485,13 @@ typedef struct log {
 #define XLOG_FORCED_SHUTDOWN(log)       ((log)->l_flags & XLOG_IO_ERROR)
-#define XLOG_GRANT_SUB_SPACE(log,bytes,type)                            \
-    {                                                                   \
-        if (type == 'w') {                                              \
-                (log)->l_grant_write_bytes -= (bytes);                  \
-                if ((log)->l_grant_write_bytes < 0) {                   \
-                        (log)->l_grant_write_bytes += (log)->l_logsize; \
-                        (log)->l_grant_write_cycle--;                   \
-                }                                                       \
-        } else {                                                        \
-                (log)->l_grant_reserve_bytes -= (bytes);                \
-                if ((log)->l_grant_reserve_bytes < 0) {                 \
-                        (log)->l_grant_reserve_bytes += (log)->l_logsize;\
-                        (log)->l_grant_reserve_cycle--;                 \
-                }                                                       \
-         }                                                              \
-    }
-#define XLOG_GRANT_ADD_SPACE(log,bytes,type)                            \
-    {                                                                   \
-        if (type == 'w') {                                              \
-                (log)->l_grant_write_bytes += (bytes);                  \
-                if ((log)->l_grant_write_bytes > (log)->l_logsize) {    \
-                        (log)->l_grant_write_bytes -= (log)->l_logsize; \
-                        (log)->l_grant_write_cycle++;                   \
-                }                                                       \
-        } else {                                                        \
-                (log)->l_grant_reserve_bytes += (bytes);                \
-                if ((log)->l_grant_reserve_bytes > (log)->l_logsize) {  \
-                        (log)->l_grant_reserve_bytes -= (log)->l_logsize;\
-                        (log)->l_grant_reserve_cycle++;                 \
-                }                                                       \
-         }                                                              \
-    }
-#define XLOG_INS_TICKETQ(q, tic)                        \
-    {                                                   \
-        if (q) {                                        \
-                (tic)->t_next       = (q);              \
-                (tic)->t_prev       = (q)->t_prev;      \
-                (q)->t_prev->t_next = (tic);            \
-                (q)->t_prev         = (tic);            \
-        } else {                                        \
-                (tic)->t_prev = (tic)->t_next = (tic);  \
-                (q) = (tic);                            \
-        }                                               \
-        (tic)->t_flags |= XLOG_TIC_IN_Q;                \
-    }
-#define XLOG_DEL_TICKETQ(q, tic)                        \
-    {                                                   \
-        if ((tic) == (tic)->t_next) {                   \
-                (q) = NULL;                             \
-        } else {                                        \
-                (q) = (tic)->t_next;                    \
-                (tic)->t_next->t_prev = (tic)->t_prev;  \
-                (tic)->t_prev->t_next = (tic)->t_next;  \
-        }                                               \
-        (tic)->t_next = (tic)->t_prev = NULL;           \
-        (tic)->t_flags &= ~XLOG_TIC_IN_Q;               \
-    }
 /* common routines */
 extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp);
 extern int       xlog_find_tail(xlog_t  *log,
                                xfs_daddr_t *head_blk,
-                                xfs_daddr_t *tail_blk,
+                                xfs_daddr_t *tail_blk);
-                                int readonly);
+extern int       xlog_recover(xlog_t *log);
-extern int       xlog_recover(xlog_t *log, int readonly);
 extern int       xlog_recover_finish(xlog_t *log, int mfsi_flags);
 extern void      xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int);
 extern void      xlog_recover_process_iunlinks(xlog_t *log);
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 8ab7df768063..7d46cbd6a07a 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -783,8 +783,7 @@ int
 xlog_find_tail(
        xlog_t                  *log,
        xfs_daddr_t             *head_blk,
-        xfs_daddr_t             *tail_blk,
+        xfs_daddr_t             *tail_blk)
-        int                     readonly)
 {
        xlog_rec_header_t       *rhead;
        xlog_op_header_t        *op_head;
@@ -2563,10 +2562,12 @@ xlog_recover_do_quotaoff_trans(
        /*
         * The logitem format's flag tells us if this was user quotaoff,
-         * group quotaoff or both.
+         * group/project quotaoff or both.
         */
        if (qoff_f->qf_flags & XFS_UQUOTA_ACCT)
                log->l_quotaoffs_flag |= XFS_DQ_USER;
+        if (qoff_f->qf_flags & XFS_PQUOTA_ACCT)
+                log->l_quotaoffs_flag |= XFS_DQ_PROJ;
        if (qoff_f->qf_flags & XFS_GQUOTA_ACCT)
                log->l_quotaoffs_flag |= XFS_DQ_GROUP;
@@ -3890,14 +3891,13 @@ xlog_do_recover(
 */
 int
 xlog_recover(
-        xlog_t          *log,
+        xlog_t          *log)
-        int             readonly)
 {
        xfs_daddr_t     head_blk, tail_blk;
        int             error;
        /* find the tail of the log */
-        if ((error = xlog_find_tail(log, &head_blk, &tail_blk, readonly)))
+        if ((error = xlog_find_tail(log, &head_blk, &tail_blk)))
                return error;
        if (tail_blk != head_blk) {
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 303af86739bf..6088e14f84e3 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -51,7 +51,7 @@ STATIC int	xfs_uuid_mount(xfs_mount_t *);
 STATIC void     xfs_uuid_unmount(xfs_mount_t *mp);
 STATIC void     xfs_unmountfs_wait(xfs_mount_t *);
-static struct {
+static const struct {
    short offset;
    short type;     /* 0 = integer
                * 1 = binary / string (no translation)
@@ -1077,8 +1077,7 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
        xfs_iflush_all(mp);
-        XFS_QM_DQPURGEALL(mp,
+        XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING);
-                XFS_QMOPT_UQUOTA | XFS_QMOPT_GQUOTA | XFS_QMOPT_UMOUNTING);
        /*
         * Flush out the log synchronously so that we know for sure
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 3432fd5a3986..cd3cf9613a00 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -308,7 +308,6 @@ typedef struct xfs_mount {
        xfs_buftarg_t           *m_ddev_targp;  /* saves taking the address */
        xfs_buftarg_t           *m_logdev_targp;/* ptr to log device */
        xfs_buftarg_t           *m_rtdev_targp; /* ptr to rt device */
-#define m_dev           m_ddev_targp->pbr_dev
        __uint8_t               m_dircook_elog; /* log d-cookie entry bits */
        __uint8_t               m_blkbit_log;   /* blocklog + NBBY */
        __uint8_t               m_blkbb_log;    /* blocklog - BBSHIFT */
@@ -393,7 +392,7 @@ typedef struct xfs_mount {
                                                   user */
 #define XFS_MOUNT_NOALIGN       (1ULL << 7)     /* turn off stripe alignment
                                                   allocations */
-#define XFS_MOUNT_COMPAT_ATTR   (1ULL << 8)     /* do not use attr2 format */
+#define XFS_MOUNT_ATTR2         (1ULL << 8)     /* allow use of attr2 format */
                             /* (1ULL << 9)     -- currently unused */
 #define XFS_MOUNT_NORECOVERY    (1ULL << 10)    /* no recovery - dirty fs */
 #define XFS_MOUNT_SHARED        (1ULL << 11)    /* shared mount */
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index 4d4e8f4e768e..81a05cfd77d2 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -243,7 +243,6 @@ xfs_rename(
        xfs_inode_t     *inodes[4];
        int             target_ip_dropped = 0;  /* dropped target_ip link? */
        vnode_t         *src_dir_vp;
-        bhv_desc_t      *target_dir_bdp;
        int             spaceres;
        int             target_link_zero = 0;
        int             num_inodes;
@@ -260,14 +259,12 @@ xfs_rename(
         * Find the XFS behavior descriptor for the target directory
         * vnode since it was not handed to us.
         */
-        target_dir_bdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(target_dir_vp),
+        target_dp = xfs_vtoi(target_dir_vp);
-                                                &xfs_vnodeops);
+        if (target_dp == NULL) {
-        if (target_dir_bdp == NULL) {
                return XFS_ERROR(EXDEV);
        }
        src_dp = XFS_BHVTOI(src_dir_bdp);
-        target_dp = XFS_BHVTOI(target_dir_bdp);
        mp = src_dp->i_mount;
        if (DM_EVENT_ENABLED(src_dir_vp->v_vfsp, src_dp, DM_EVENT_RENAME) ||
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index c4b20872f07d..a59c102cf214 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -238,6 +238,7 @@ xfs_bioerror_relse(
        }
        return (EIO);
 }
 /*
 * Prints out an ALERT message about I/O error.
 */
@@ -252,11 +253,9 @@ xfs_ioerror_alert(
 "I/O error in filesystem (\"%s\") meta-data dev %s block 0x%llx"
 "       (\"%s\") error %d buf count %zd",
                (!mp || !mp->m_fsname) ? "(fs name not set)" : mp->m_fsname,
-                XFS_BUFTARG_NAME(bp->pb_target),
+                XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)),
-                (__uint64_t)blkno,
+                (__uint64_t)blkno, func,
-                func,
+                XFS_BUF_GETERROR(bp), XFS_BUF_COUNT(bp));
-                XFS_BUF_GETERROR(bp),
-                XFS_BUF_COUNT(bp));
 }
 /*
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index 4a17d335f897..bf168a91ddb8 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -68,18 +68,6 @@ struct xfs_mount;
        (XFS_SB_VERSION_NUMBITS | \
         XFS_SB_VERSION_OKREALFBITS | \
         XFS_SB_VERSION_OKSASHFBITS)
-#define XFS_SB_VERSION_MKFS(ia,dia,extflag,dirv2,na,sflag,morebits)     \
-        (((ia) || (dia) || (extflag) || (dirv2) || (na) || (sflag) || \
-          (morebits)) ? \
-                (XFS_SB_VERSION_4 | \
-                 ((ia) ? XFS_SB_VERSION_ALIGNBIT : 0) | \
-                 ((dia) ? XFS_SB_VERSION_DALIGNBIT : 0) | \
-                 ((extflag) ? XFS_SB_VERSION_EXTFLGBIT : 0) | \
-                 ((dirv2) ? XFS_SB_VERSION_DIRV2BIT : 0) | \
-                 ((na) ? XFS_SB_VERSION_LOGV2BIT : 0) | \
-                 ((sflag) ? XFS_SB_VERSION_SECTORBIT : 0) | \
-                 ((morebits) ? XFS_SB_VERSION_MOREBITSBIT : 0)) : \
-                XFS_SB_VERSION_1)
 /*
 * There are two words to hold XFS "feature" bits: the original
@@ -105,11 +93,6 @@ struct xfs_mount;
        (XFS_SB_VERSION2_OKREALFBITS |  \
         XFS_SB_VERSION2_OKSASHFBITS )
-/*
- * mkfs macro to set up sb_features2 word
- */
-#define XFS_SB_VERSION2_MKFS(resvd1, sbcntr)    0
 typedef struct xfs_sb
 {
        __uint32_t      sb_magicnum;    /* magic number == XFS_SB_MAGIC */
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 279e043d7323..d3d714e6b32a 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1014,6 +1014,7 @@ xfs_trans_cancel(
        xfs_log_item_t          *lip;
        int                     i;
 #endif
+        xfs_mount_t             *mp = tp->t_mountp;
        /*
         * See if the caller is being too lazy to figure out if
@@ -1026,9 +1027,10 @@ xfs_trans_cancel(
         * filesystem.  This happens in paths where we detect
         * corruption and decide to give up.
         */
-        if ((tp->t_flags & XFS_TRANS_DIRTY) &&
+        if ((tp->t_flags & XFS_TRANS_DIRTY) && !XFS_FORCED_SHUTDOWN(mp)) {
-            !XFS_FORCED_SHUTDOWN(tp->t_mountp))
+                XFS_ERROR_REPORT("xfs_trans_cancel", XFS_ERRLEVEL_LOW, mp);
-                xfs_force_shutdown(tp->t_mountp, XFS_CORRUPT_INCORE);
+                xfs_force_shutdown(mp, XFS_CORRUPT_INCORE);
+        }
 #ifdef DEBUG
        if (!(flags & XFS_TRANS_ABORT)) {
                licp = &(tp->t_items);
@@ -1040,7 +1042,7 @@ xfs_trans_cancel(
                                }
                                lip = lidp->lid_item;
-                                if (!XFS_FORCED_SHUTDOWN(tp->t_mountp))
+                                if (!XFS_FORCED_SHUTDOWN(mp))
                                        ASSERT(!(lip->li_type == XFS_LI_EFD));
                        }
                        licp = licp->lic_next;
@@ -1048,7 +1050,7 @@ xfs_trans_cancel(
        }
 #endif
        xfs_trans_unreserve_and_mod_sb(tp);
-        XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(tp->t_mountp, tp);
+        XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(mp, tp);
        if (tp->t_ticket) {
                if (flags & XFS_TRANS_RELEASE_LOG_RES) {
@@ -1057,7 +1059,7 @@ xfs_trans_cancel(
                } else {
                        log_flags = 0;
                }
-                xfs_log_done(tp->t_mountp, tp->t_ticket, NULL, log_flags);
+                xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
        }
        /* mark this thread as no longer being in a transaction */
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index a889963fdd14..d77901c07f63 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -973,7 +973,6 @@ void		xfs_trans_bhold(xfs_trans_t *, struct xfs_buf *);
 void            xfs_trans_bhold_release(xfs_trans_t *, struct xfs_buf *);
 void            xfs_trans_binval(xfs_trans_t *, struct xfs_buf *);
 void            xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *);
-void            xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *);
 void            xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *);
 void            xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint);
 void            xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *);
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index fefe1d60377f..34654ec6ae10 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -55,16 +55,13 @@ xfs_get_dir_entry(
        xfs_inode_t     **ipp)
 {
        vnode_t         *vp;
-        bhv_desc_t      *bdp;
        vp = VNAME_TO_VNODE(dentry);
-        bdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(vp), &xfs_vnodeops);
-        if (!bdp) {
+        *ipp = xfs_vtoi(vp);
-                *ipp = NULL;
+        if (!*ipp)
                return XFS_ERROR(ENOENT);
-        }
        VN_HOLD(vp);
-        *ipp = XFS_BHVTOI(bdp);
        return 0;
 }
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 7bdbd991ab1c..b6ad370fab3d 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -53,6 +53,7 @@
 #include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_clnt.h"
+#include "xfs_fsops.h"
 STATIC int xfs_sync(bhv_desc_t *, int, cred_t *);
@@ -290,8 +291,8 @@ xfs_start_flags(
                mp->m_flags |= XFS_MOUNT_IDELETE;
        if (ap->flags & XFSMNT_DIRSYNC)
                mp->m_flags |= XFS_MOUNT_DIRSYNC;
-        if (ap->flags & XFSMNT_COMPAT_ATTR)
+        if (ap->flags & XFSMNT_ATTR2)
-                mp->m_flags |= XFS_MOUNT_COMPAT_ATTR;
+                mp->m_flags |= XFS_MOUNT_ATTR2;
        if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE)
                mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
@@ -312,6 +313,8 @@ xfs_start_flags(
                mp->m_flags |= XFS_MOUNT_NOUUID;
        if (ap->flags & XFSMNT_BARRIER)
                mp->m_flags |= XFS_MOUNT_BARRIER;
+        else
+                mp->m_flags &= ~XFS_MOUNT_BARRIER;
        return 0;
 }
@@ -330,10 +333,11 @@ xfs_finish_flags(
        /* Fail a mount where the logbuf is smaller then the log stripe */
        if (XFS_SB_VERSION_HASLOGV2(&mp->m_sb)) {
-                if ((ap->logbufsize == -1) &&
+                if ((ap->logbufsize <= 0) &&
                    (mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE)) {
                        mp->m_logbsize = mp->m_sb.sb_logsunit;
-                } else if (ap->logbufsize < mp->m_sb.sb_logsunit) {
+                } else if (ap->logbufsize > 0 &&
+                           ap->logbufsize < mp->m_sb.sb_logsunit) {
                        cmn_err(CE_WARN,
        "XFS: logbuf size must be greater than or equal to log stripe size");
                        return XFS_ERROR(EINVAL);
@@ -347,6 +351,10 @@ xfs_finish_flags(
                }
        }
+        if (XFS_SB_VERSION_HASATTR2(&mp->m_sb)) {
+                mp->m_flags |= XFS_MOUNT_ATTR2;
+        }
        /*
         * prohibit r/w mounts of read-only filesystems
         */
@@ -382,10 +390,6 @@ xfs_finish_flags(
                        return XFS_ERROR(EINVAL);
        }
-        if (XFS_SB_VERSION_HASATTR2(&mp->m_sb)) {
-                mp->m_flags &= ~XFS_MOUNT_COMPAT_ATTR;
-        }
        return 0;
 }
@@ -504,13 +508,13 @@ xfs_mount(
        if (error)
                goto error2;
+        if ((mp->m_flags & XFS_MOUNT_BARRIER) && !(vfsp->vfs_flag & VFS_RDONLY))
+                xfs_mountfs_check_barriers(mp);
        error = XFS_IOINIT(vfsp, args, flags);
        if (error)
                goto error2;
-        if ((args->flags & XFSMNT_BARRIER) &&
-            !(XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY))
-                xfs_mountfs_check_barriers(mp);
        return 0;
 error2:
@@ -655,6 +659,11 @@ xfs_mntupdate(
        else
                mp->m_flags &= ~XFS_MOUNT_NOATIME;
+        if (args->flags & XFSMNT_BARRIER)
+                mp->m_flags |= XFS_MOUNT_BARRIER;
+        else
+                mp->m_flags &= ~XFS_MOUNT_BARRIER;
        if ((vfsp->vfs_flag & VFS_RDONLY) &&
            !(*flags & MS_RDONLY)) {
                vfsp->vfs_flag &= ~VFS_RDONLY;
@@ -1634,6 +1643,7 @@ xfs_vget(
 #define MNTOPT_NORECOVERY   "norecovery"   /* don't run XFS recovery */
 #define MNTOPT_BARRIER  "barrier"       /* use writer barriers for log write and
                                         * unwritten extent conversion */
+#define MNTOPT_NOBARRIER "nobarrier"    /* .. disable */
 #define MNTOPT_OSYNCISOSYNC "osyncisosync" /* o_sync is REALLY o_sync */
 #define MNTOPT_64BITINODE   "inode64"   /* inodes can be allocated anywhere */
 #define MNTOPT_IKEEP    "ikeep"         /* do not free empty inode clusters */
@@ -1680,7 +1690,6 @@ xfs_parseargs(
        int                     iosize;
        args->flags2 |= XFSMNT2_COMPAT_IOSIZE;
-        args->flags |= XFSMNT_COMPAT_ATTR;
 #if 0   /* XXX: off by default, until some remaining issues ironed out */
        args->flags |= XFSMNT_IDELETE; /* default to on */
@@ -1806,6 +1815,8 @@ xfs_parseargs(
                        args->flags |= XFSMNT_NOUUID;
                } else if (!strcmp(this_char, MNTOPT_BARRIER)) {
                        args->flags |= XFSMNT_BARRIER;
+                } else if (!strcmp(this_char, MNTOPT_NOBARRIER)) {
+                        args->flags &= ~XFSMNT_BARRIER;
                } else if (!strcmp(this_char, MNTOPT_IKEEP)) {
                        args->flags &= ~XFSMNT_IDELETE;
                } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
@@ -1815,9 +1826,9 @@ xfs_parseargs(
                } else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) {
                        args->flags2 |= XFSMNT2_COMPAT_IOSIZE;
                } else if (!strcmp(this_char, MNTOPT_ATTR2)) {
-                        args->flags &= ~XFSMNT_COMPAT_ATTR;
+                        args->flags |= XFSMNT_ATTR2;
                } else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
-                        args->flags |= XFSMNT_COMPAT_ATTR;
+                        args->flags &= ~XFSMNT_ATTR2;
                } else if (!strcmp(this_char, "osyncisdsync")) {
                        /* no-op, this is now the default */
 printk("XFS: osyncisdsync is now the default, option is deprecated.\n");
@@ -1892,7 +1903,6 @@ xfs_showargs(
                { XFS_MOUNT_NOUUID,             "," MNTOPT_NOUUID },
                { XFS_MOUNT_NORECOVERY,         "," MNTOPT_NORECOVERY },
                { XFS_MOUNT_OSYNCISOSYNC,       "," MNTOPT_OSYNCISOSYNC },
-                { XFS_MOUNT_BARRIER,            "," MNTOPT_BARRIER },
                { XFS_MOUNT_IDELETE,            "," MNTOPT_NOIKEEP },
                { 0, NULL }
        };
@@ -1914,33 +1924,28 @@ xfs_showargs(
        if (mp->m_logbufs > 0)
                seq_printf(m, "," MNTOPT_LOGBUFS "=%d", mp->m_logbufs);
        if (mp->m_logbsize > 0)
                seq_printf(m, "," MNTOPT_LOGBSIZE "=%dk", mp->m_logbsize >> 10);
        if (mp->m_logname)
                seq_printf(m, "," MNTOPT_LOGDEV "=%s", mp->m_logname);
        if (mp->m_rtname)
                seq_printf(m, "," MNTOPT_RTDEV "=%s", mp->m_rtname);
        if (mp->m_dalign > 0)
                seq_printf(m, "," MNTOPT_SUNIT "=%d",
                                (int)XFS_FSB_TO_BB(mp, mp->m_dalign));
        if (mp->m_swidth > 0)
                seq_printf(m, "," MNTOPT_SWIDTH "=%d",
                                (int)XFS_FSB_TO_BB(mp, mp->m_swidth));
-        if (!(mp->m_flags & XFS_MOUNT_COMPAT_ATTR))
-                seq_printf(m, "," MNTOPT_ATTR2);
        if (!(mp->m_flags & XFS_MOUNT_COMPAT_IOSIZE))
                seq_printf(m, "," MNTOPT_LARGEIO);
+        if (mp->m_flags & XFS_MOUNT_BARRIER)
+                seq_printf(m, "," MNTOPT_BARRIER);
        if (!(vfsp->vfs_flag & VFS_32BITINODES))
                seq_printf(m, "," MNTOPT_64BITINODE);
        if (vfsp->vfs_flag & VFS_GRPID)
                seq_printf(m, "," MNTOPT_GRPID);
@@ -1959,6 +1964,7 @@ xfs_freeze(
        /* Push the superblock and write an unmount record */
        xfs_log_unmount_write(mp);
        xfs_unmountfs_writesb(mp);
+        xfs_fs_log_dummy(mp);
 }
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index e92cacde02f5..8076cc981e11 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -185,8 +185,7 @@ xfs_getattr(
                break;
        }
-        vap->va_atime.tv_sec = ip->i_d.di_atime.t_sec;
+        vn_atime_to_timespec(vp, &vap->va_atime);
-        vap->va_atime.tv_nsec = ip->i_d.di_atime.t_nsec;
        vap->va_mtime.tv_sec = ip->i_d.di_mtime.t_sec;
        vap->va_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec;
        vap->va_ctime.tv_sec = ip->i_d.di_ctime.t_sec;
@@ -544,24 +543,6 @@ xfs_setattr(
                }
                /*
-                 * Can't set extent size unless the file is marked, or
-                 * about to be marked as a realtime file.
-                 *
-                 * This check will be removed when fixed size extents
-                 * with buffered data writes is implemented.
-                 *
-                 */
-                if ((mask & XFS_AT_EXTSIZE)                     &&
-                    ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) !=
-                     vap->va_extsize) &&
-                    (!((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ||
-                       ((mask & XFS_AT_XFLAGS) &&
-                        (vap->va_xflags & XFS_XFLAG_REALTIME))))) {
-                        code = XFS_ERROR(EINVAL);
-                        goto error_return;
-                }
-                /*
                 * Can't change realtime flag if any extents are allocated.
                 */
                if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
@@ -823,13 +804,17 @@ xfs_setattr(
                                        di_flags |= XFS_DIFLAG_RTINHERIT;
                                if (vap->va_xflags & XFS_XFLAG_NOSYMLINKS)
                                        di_flags |= XFS_DIFLAG_NOSYMLINKS;
-                        } else {
+                                if (vap->va_xflags & XFS_XFLAG_EXTSZINHERIT)
+                                        di_flags |= XFS_DIFLAG_EXTSZINHERIT;
+                        } else if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) {
                                if (vap->va_xflags & XFS_XFLAG_REALTIME) {
                                        di_flags |= XFS_DIFLAG_REALTIME;
                                        ip->i_iocore.io_flags |= XFS_IOCORE_RT;
                                } else {
                                        ip->i_iocore.io_flags &= ~XFS_IOCORE_RT;
                                }
+                                if (vap->va_xflags & XFS_XFLAG_EXTSIZE)
+                                        di_flags |= XFS_DIFLAG_EXTSIZE;
                        }
                        ip->i_d.di_flags = di_flags;
                }
@@ -999,10 +984,6 @@ xfs_readlink(
                goto error_return;
        }
-        if (!(ioflags & IO_INVIS)) {
-                xfs_ichgtime(ip, XFS_ICHGTIME_ACC);
-        }
        /*
         * See if the symlink is stored inline.
         */
@@ -1234,7 +1215,8 @@ xfs_inactive_free_eofblocks(
        xfs_iunlock(ip, XFS_ILOCK_SHARED);
        if (!error && (nimaps != 0) &&
-            (imap.br_startblock != HOLESTARTBLOCK)) {
+            (imap.br_startblock != HOLESTARTBLOCK ||
+             ip->i_delayed_blks)) {
                /*
                 * Attach the dquots to the inode up front.
                 */
@@ -1569,9 +1551,11 @@ xfs_release(
        if (ip->i_d.di_nlink != 0) {
                if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
-                     ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0)) &&
+                     ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0 ||
+                       ip->i_delayed_blks > 0)) &&
                     (ip->i_df.if_flags & XFS_IFEXTENTS))  &&
-                    (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)))) {
+                    (!(ip->i_d.di_flags &
+                                (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
                        if ((error = xfs_inactive_free_eofblocks(mp, ip)))
                                return (error);
                        /* Update linux inode block count after free above */
@@ -1628,7 +1612,8 @@ xfs_inactive(
         * only one with a reference to the inode.
         */
        truncate = ((ip->i_d.di_nlink == 0) &&
-            ((ip->i_d.di_size != 0) || (ip->i_d.di_nextents > 0)) &&
+            ((ip->i_d.di_size != 0) || (ip->i_d.di_nextents > 0) ||
+             (ip->i_delayed_blks > 0)) &&
            ((ip->i_d.di_mode & S_IFMT) == S_IFREG));
        mp = ip->i_mount;
@@ -1646,10 +1631,12 @@ xfs_inactive(
        if (ip->i_d.di_nlink != 0) {
                if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
-                     ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0)) &&
+                     ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0 ||
-                     (ip->i_df.if_flags & XFS_IFEXTENTS))  &&
+                       ip->i_delayed_blks > 0)) &&
-                    (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)) ||
+                      (ip->i_df.if_flags & XFS_IFEXTENTS) &&
-                     (ip->i_delayed_blks != 0))) {
+                     (!(ip->i_d.di_flags &
+                                (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) ||
+                      (ip->i_delayed_blks != 0)))) {
                        if ((error = xfs_inactive_free_eofblocks(mp, ip)))
                                return (VN_INACTIVE_CACHE);
                        /* Update linux inode block count after free above */
@@ -2593,7 +2580,6 @@ xfs_link(
        int                     cancel_flags;
        int                     committed;
        vnode_t                 *target_dir_vp;
-        bhv_desc_t              *src_bdp;
        int                     resblks;
        char                    *target_name = VNAME(dentry);
        int                     target_namelen;
@@ -2606,8 +2592,7 @@ xfs_link(
        if (VN_ISDIR(src_vp))
                return XFS_ERROR(EPERM);
-        src_bdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(src_vp), &xfs_vnodeops);
+        sip = xfs_vtoi(src_vp);
-        sip = XFS_BHVTOI(src_bdp);
        tdp = XFS_BHVTOI(target_dir_bdp);
        mp = tdp->i_mount;
        if (XFS_FORCED_SHUTDOWN(mp))
@@ -3240,7 +3225,6 @@ xfs_readdir(
        xfs_trans_t     *tp = NULL;
        int             error = 0;
        uint            lock_mode;
-        xfs_off_t       start_offset;
        vn_trace_entry(BHV_TO_VNODE(dir_bdp), __FUNCTION__,
                                               (inst_t *)__return_address);
@@ -3251,11 +3235,7 @@ xfs_readdir(
        }
        lock_mode = xfs_ilock_map_shared(dp);
-        start_offset = uiop->uio_offset;
        error = XFS_DIR_GETDENTS(dp->i_mount, tp, dp, uiop, eofp);
-        if (start_offset != uiop->uio_offset) {
-                xfs_ichgtime(dp, XFS_ICHGTIME_ACC);
-        }
        xfs_iunlock_map_shared(dp, lock_mode);
        return error;
 }
@@ -3832,7 +3812,12 @@ xfs_reclaim(
        vn_iowait(vp);
        ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
-        ASSERT(VN_CACHED(vp) == 0);
+        /*
+         * Make sure the atime in the XFS inode is correct before freeing the
+         * Linux inode.
+         */
+        xfs_synchronize_atime(ip);
        /* If we have nothing to flush with this inode then complete the
         * teardown now, otherwise break the link between the xfs inode
@@ -4002,42 +3987,36 @@ xfs_alloc_file_space(
        int                     alloc_type,
        int                     attr_flags)
 {
+        xfs_mount_t             *mp = ip->i_mount;
+        xfs_off_t               count;
        xfs_filblks_t           allocated_fsb;
        xfs_filblks_t           allocatesize_fsb;
-        int                     committed;
+        xfs_extlen_t            extsz, temp;
-        xfs_off_t               count;
+        xfs_fileoff_t           startoffset_fsb;
-        xfs_filblks_t           datablocks;
-        int                     error;
        xfs_fsblock_t           firstfsb;
-        xfs_bmap_free_t         free_list;
+        int                     nimaps;
-        xfs_bmbt_irec_t         *imapp;
+        int                     bmapi_flag;
-        xfs_bmbt_irec_t         imaps[1];
+        int                     quota_flag;
-        xfs_mount_t             *mp;
-        int                     numrtextents;
-        int                     reccount;
-        uint                    resblks;
        int                     rt;
-        int                     rtextsize;
-        xfs_fileoff_t           startoffset_fsb;
        xfs_trans_t             *tp;
-        int                     xfs_bmapi_flags;
+        xfs_bmbt_irec_t         imaps[1], *imapp;
+        xfs_bmap_free_t         free_list;
+        uint                    qblocks, resblks, resrtextents;
+        int                     committed;
+        int                     error;
        vn_trace_entry(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address);
-        mp = ip->i_mount;
        if (XFS_FORCED_SHUTDOWN(mp))
                return XFS_ERROR(EIO);
-        /*
+        rt = XFS_IS_REALTIME_INODE(ip);
-         * determine if this is a realtime file
+        if (unlikely(rt)) {
-         */
+                if (!(extsz = ip->i_d.di_extsize))
-        if ((rt = XFS_IS_REALTIME_INODE(ip)) != 0) {
+                        extsz = mp->m_sb.sb_rextsize;
-                if (ip->i_d.di_extsize)
+        } else {
-                        rtextsize = ip->i_d.di_extsize;
+                extsz = ip->i_d.di_extsize;
-                else
+        }
-                        rtextsize = mp->m_sb.sb_rextsize;
-        } else
-                rtextsize = 0;
        if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
                return error;
@@ -4048,8 +4027,8 @@ xfs_alloc_file_space(
        count = len;
        error = 0;
        imapp = &imaps[0];
-        reccount = 1;
+        nimaps = 1;
-        xfs_bmapi_flags = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0);
+        bmapi_flag = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0);
        startoffset_fsb = XFS_B_TO_FSBT(mp, offset);
        allocatesize_fsb = XFS_B_TO_FSB(mp, count);
@@ -4070,43 +4049,51 @@ xfs_alloc_file_space(
        }
        /*
-         * allocate file space until done or until there is an error
+         * Allocate file space until done or until there is an error
         */
 retry:
        while (allocatesize_fsb && !error) {
+                xfs_fileoff_t   s, e;
                /*
-                 * determine if reserving space on
+                 * Determine space reservations for data/realtime.
-                 * the data or realtime partition.
                 */
-                if (rt) {
+                if (unlikely(extsz)) {
-                        xfs_fileoff_t s, e;
                        s = startoffset_fsb;
-                        do_div(s, rtextsize);
+                        do_div(s, extsz);
-                        s *= rtextsize;
+                        s *= extsz;
-                        e = roundup_64(startoffset_fsb + allocatesize_fsb,
+                        e = startoffset_fsb + allocatesize_fsb;
-                                rtextsize);
+                        if ((temp = do_mod(startoffset_fsb, extsz)))
-                        numrtextents = (int)(e - s) / mp->m_sb.sb_rextsize;
+                                e += temp;
-                        datablocks = 0;
+                        if ((temp = do_mod(e, extsz)))
+                                e += extsz - temp;
+                } else {
+                        s = 0;
+                        e = allocatesize_fsb;
+                }
+                if (unlikely(rt)) {
+                        resrtextents = qblocks = (uint)(e - s);
+                        resrtextents /= mp->m_sb.sb_rextsize;
+                        resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
+                        quota_flag = XFS_QMOPT_RES_RTBLKS;
                } else {
-                        datablocks = allocatesize_fsb;
+                        resrtextents = 0;
-                        numrtextents = 0;
+                        resblks = qblocks = \
+                                XFS_DIOSTRAT_SPACE_RES(mp, (uint)(e - s));
+                        quota_flag = XFS_QMOPT_RES_REGBLKS;
                }
                /*
-                 * allocate and setup the transaction
+                 * Allocate and setup the transaction.
                 */
                tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
-                resblks = XFS_DIOSTRAT_SPACE_RES(mp, datablocks);
+                error = xfs_trans_reserve(tp, resblks,
-                error = xfs_trans_reserve(tp,
+                                          XFS_WRITE_LOG_RES(mp), resrtextents,
-                                          resblks,
-                                          XFS_WRITE_LOG_RES(mp),
-                                          numrtextents,
                                          XFS_TRANS_PERM_LOG_RES,
                                          XFS_WRITE_LOG_COUNT);
                /*
-                 * check for running out of space
+                 * Check for running out of space
                 */
                if (error) {
                        /*
@@ -4117,8 +4104,8 @@ retry:
                        break;
                }
                xfs_ilock(ip, XFS_ILOCK_EXCL);
-                error = XFS_TRANS_RESERVE_QUOTA(mp, tp,
+                error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip,
-                                ip->i_udquot, ip->i_gdquot, resblks, 0, 0);
+                                                      qblocks, 0, quota_flag);
                if (error)
                        goto error1;
@@ -4126,19 +4113,19 @@ retry:
                xfs_trans_ihold(tp, ip);
                /*
-                 * issue the bmapi() call to allocate the blocks
+                 * Issue the xfs_bmapi() call to allocate the blocks
                 */
                XFS_BMAP_INIT(&free_list, &firstfsb);
                error = xfs_bmapi(tp, ip, startoffset_fsb,
-                                  allocatesize_fsb, xfs_bmapi_flags,
+                                  allocatesize_fsb, bmapi_flag,
-                                  &firstfsb, 0, imapp, &reccount,
+                                  &firstfsb, 0, imapp, &nimaps,
                                  &free_list);
                if (error) {
                        goto error0;
                }
                /*
-                 * complete the transaction
+                 * Complete the transaction
                 */
                error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed);
                if (error) {
@@ -4153,7 +4140,7 @@ retry:
                allocated_fsb = imapp->br_blockcount;
-                if (reccount == 0) {
+                if (nimaps == 0) {
                        error = XFS_ERROR(ENOSPC);
                        break;
                }
@@ -4176,9 +4163,11 @@ dmapi_enospc_check:
        return error;
- error0:
+error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
        xfs_bmap_cancel(&free_list);
- error1:
+        XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag);
+error1: /* Just cancel transaction */
        xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
        xfs_iunlock(ip, XFS_ILOCK_EXCL);
        goto dmapi_enospc_check;
@@ -4423,8 +4412,8 @@ xfs_free_file_space(
                }
                xfs_ilock(ip, XFS_ILOCK_EXCL);
                error = XFS_TRANS_RESERVE_QUOTA(mp, tp,
-                                ip->i_udquot, ip->i_gdquot, resblks, 0, rt ?
+                                ip->i_udquot, ip->i_gdquot, resblks, 0,
-                                XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
+                                XFS_QMOPT_RES_REGBLKS);
                if (error)
                        goto error1;
diff --git a/mm/swap.c b/mm/swap.c
index ee6d71ccfa56..cbb48e721ab9 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -384,6 +384,8 @@ unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,
        return pagevec_count(pvec);
 }
+EXPORT_SYMBOL(pagevec_lookup);
 unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping,
                pgoff_t *index, int tag, unsigned nr_pages)
 {