30 files changed, 2466 insertions, 1621 deletions
diff --git a/fs/xfs/linux-2.6/sv.h b/fs/xfs/linux-2.6/sv.h
index 351a8f454bd1..4dfc7c370819 100644
--- a/fs/xfs/linux-2.6/sv.h
+++ b/fs/xfs/linux-2.6/sv.h
@@ -32,23 +32,15 @@ typedef struct sv_s {
        wait_queue_head_t waiters;
 } sv_t;
-#define SV_FIFO         0x0             /* sv_t is FIFO type */
+static inline void _sv_wait(sv_t *sv, spinlock_t *lock)
-#define SV_LIFO         0x2             /* sv_t is LIFO type */
-#define SV_PRIO         0x4             /* sv_t is PRIO type */
-#define SV_KEYED        0x6             /* sv_t is KEYED type */
-#define SV_DEFAULT      SV_FIFO
-static inline void _sv_wait(sv_t *sv, spinlock_t *lock, int state,
-                             unsigned long timeout)
 {
        DECLARE_WAITQUEUE(wait, current);
        add_wait_queue_exclusive(&sv->waiters, &wait);
-        __set_current_state(state);
+        __set_current_state(TASK_UNINTERRUPTIBLE);
        spin_unlock(lock);
-        schedule_timeout(timeout);
+        schedule();
        remove_wait_queue(&sv->waiters, &wait);
 }
@@ -58,13 +50,7 @@ static inline void _sv_wait(sv_t *sv, spinlock_t *lock, int state,
 #define sv_destroy(sv) \
        /*NOTHING*/
 #define sv_wait(sv, pri, lock, s) \
-        _sv_wait(sv, lock, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT)
+        _sv_wait(sv, lock)
-#define sv_wait_sig(sv, pri, lock, s)   \
-        _sv_wait(sv, lock, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT)
-#define sv_timedwait(sv, pri, lock, s, svf, ts, rts) \
-        _sv_wait(sv, lock, TASK_UNINTERRUPTIBLE, timespec_to_jiffies(ts))
-#define sv_timedwait_sig(sv, pri, lock, s, svf, ts, rts) \
-        _sv_wait(sv, lock, TASK_INTERRUPTIBLE, timespec_to_jiffies(ts))
 #define sv_signal(sv) \
        wake_up(&(sv)->waiters)
 #define sv_broadcast(sv) \
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index a44d68eb50b5..de3a198f771e 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -42,6 +42,40 @@
 #include <linux/pagevec.h>
 #include <linux/writeback.h>
+/*
+ * Prime number of hash buckets since address is used as the key.
+ */
+#define NVSYNC          37
+#define to_ioend_wq(v)  (&xfs_ioend_wq[((unsigned long)v) % NVSYNC])
+static wait_queue_head_t xfs_ioend_wq[NVSYNC];
+void __init
+xfs_ioend_init(void)
+{
+        int i;
+        for (i = 0; i < NVSYNC; i++)
+                init_waitqueue_head(&xfs_ioend_wq[i]);
+}
+void
+xfs_ioend_wait(
+        xfs_inode_t     *ip)
+{
+        wait_queue_head_t *wq = to_ioend_wq(ip);
+        wait_event(*wq, (atomic_read(&ip->i_iocount) == 0));
+}
+STATIC void
+xfs_ioend_wake(
+        xfs_inode_t     *ip)
+{
+        if (atomic_dec_and_test(&ip->i_iocount))
+                wake_up(to_ioend_wq(ip));
+}
 STATIC void
 xfs_count_page_state(
        struct page             *page,
@@ -146,16 +180,25 @@ xfs_destroy_ioend(
        xfs_ioend_t             *ioend)
 {
        struct buffer_head      *bh, *next;
+        struct xfs_inode        *ip = XFS_I(ioend->io_inode);
        for (bh = ioend->io_buffer_head; bh; bh = next) {
                next = bh->b_private;
                bh->b_end_io(bh, !ioend->io_error);
        }
-        if (unlikely(ioend->io_error)) {
-                vn_ioerror(XFS_I(ioend->io_inode), ioend->io_error,
+        /*
-                                __FILE__,__LINE__);
+         * Volume managers supporting multiple paths can send back ENODEV
+         * when the final path disappears.  In this case continuing to fill
+         * the page cache with dirty data which cannot be written out is
+         * evil, so prevent that.
+         */
+        if (unlikely(ioend->io_error == -ENODEV)) {
+                xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ,
+                                      __FILE__, __LINE__);
        }
-        vn_iowake(XFS_I(ioend->io_inode));
+        xfs_ioend_wake(ip);
        mempool_free(ioend, xfs_ioend_pool);
 }
@@ -191,7 +234,7 @@ xfs_setfilesize(
                ip->i_d.di_size = isize;
                ip->i_update_core = 1;
                ip->i_update_size = 1;
-                mark_inode_dirty_sync(ioend->io_inode);
+                xfs_mark_inode_dirty_sync(ip);
        }
        xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -317,14 +360,9 @@ xfs_map_blocks(
        xfs_iomap_t             *mapp,
        int                     flags)
 {
-        xfs_inode_t             *ip = XFS_I(inode);
+        int                     nmaps = 1;
-        int                     error, nmaps = 1;
+        return -xfs_iomap(XFS_I(inode), offset, count, flags, mapp, &nmaps);
-        error = xfs_iomap(ip, offset, count,
-                                flags, mapp, &nmaps);
-        if (!error && (flags & (BMAPI_WRITE|BMAPI_ALLOCATE)))
-                xfs_iflags_set(ip, XFS_IMODIFIED);
-        return -error;
 }
 STATIC_INLINE int
@@ -512,7 +550,7 @@ xfs_cancel_ioend(
                        unlock_buffer(bh);
                } while ((bh = next_bh) != NULL);
-                vn_iowake(XFS_I(ioend->io_inode));
+                xfs_ioend_wake(XFS_I(ioend->io_inode));
                mempool_free(ioend, xfs_ioend_pool);
        } while ((ioend = next) != NULL);
 }
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
index 3ba0631a3818..7b26f5ff9692 100644
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -43,4 +43,7 @@ typedef struct xfs_ioend {
 extern const struct address_space_operations xfs_address_space_operations;
 extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int);
+extern void xfs_ioend_init(void);
+extern void xfs_ioend_wait(struct xfs_inode *);
 #endif /* __XFS_AOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 36d5fcd3f593..cb329edc925b 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -630,6 +630,29 @@ xfs_buf_get_flags(
        return NULL;
 }
+STATIC int
+_xfs_buf_read(
+        xfs_buf_t               *bp,
+        xfs_buf_flags_t         flags)
+{
+        int                     status;
+        XB_TRACE(bp, "_xfs_buf_read", (unsigned long)flags);
+        ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE)));
+        ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL);
+        bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \
+                        XBF_READ_AHEAD | _XBF_RUN_QUEUES);
+        bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | \
+                        XBF_READ_AHEAD | _XBF_RUN_QUEUES);
+        status = xfs_buf_iorequest(bp);
+        if (!status && !(flags & XBF_ASYNC))
+                status = xfs_buf_iowait(bp);
+        return status;
+}
 xfs_buf_t *
 xfs_buf_read_flags(
        xfs_buftarg_t           *target,
@@ -646,7 +669,7 @@ xfs_buf_read_flags(
                if (!XFS_BUF_ISDONE(bp)) {
                        XB_TRACE(bp, "read", (unsigned long)flags);
                        XFS_STATS_INC(xb_get_read);
-                        xfs_buf_iostart(bp, flags);
+                        _xfs_buf_read(bp, flags);
                } else if (flags & XBF_ASYNC) {
                        XB_TRACE(bp, "read_async", (unsigned long)flags);
                        /*
@@ -1048,50 +1071,39 @@ xfs_buf_ioerror(
        XB_TRACE(bp, "ioerror", (unsigned long)error);
 }
-/*
- *      Initiate I/O on a buffer, based on the flags supplied.
- *      The b_iodone routine in the buffer supplied will only be called
- *      when all of the subsidiary I/O requests, if any, have been completed.
- */
 int
-xfs_buf_iostart(
+xfs_bawrite(
-        xfs_buf_t               *bp,
+        void                    *mp,
-        xfs_buf_flags_t         flags)
+        struct xfs_buf          *bp)
 {
-        int                     status = 0;
+        XB_TRACE(bp, "bawrite", 0);
-        XB_TRACE(bp, "iostart", (unsigned long)flags);
+        ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL);
-        if (flags & XBF_DELWRI) {
+        xfs_buf_delwri_dequeue(bp);
-                bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC);
-                bp->b_flags |= flags & (XBF_DELWRI | XBF_ASYNC);
-                xfs_buf_delwri_queue(bp, 1);
-                return 0;
-        }
-        bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \
+        bp->b_flags &= ~(XBF_READ | XBF_DELWRI | XBF_READ_AHEAD);
-                        XBF_READ_AHEAD | _XBF_RUN_QUEUES);
+        bp->b_flags |= (XBF_WRITE | XBF_ASYNC | _XBF_RUN_QUEUES);
-        bp->b_flags |= flags & (XBF_READ | XBF_WRITE | XBF_ASYNC | \
-                        XBF_READ_AHEAD | _XBF_RUN_QUEUES);
+        bp->b_mount = mp;
+        bp->b_strat = xfs_bdstrat_cb;
+        return xfs_bdstrat_cb(bp);
+}
-        BUG_ON(bp->b_bn == XFS_BUF_DADDR_NULL);
+void
+xfs_bdwrite(
+        void                    *mp,
+        struct xfs_buf          *bp)
+{
+        XB_TRACE(bp, "bdwrite", 0);
-        /* For writes allow an alternate strategy routine to precede
+        bp->b_strat = xfs_bdstrat_cb;
-         * the actual I/O request (which may not be issued at all in
+        bp->b_mount = mp;
-         * a shutdown situation, for example).
-         */
-        status = (flags & XBF_WRITE) ?
-                xfs_buf_iostrategy(bp) : xfs_buf_iorequest(bp);
-        /* Wait for I/O if we are not an async request.
+        bp->b_flags &= ~XBF_READ;
-         * Note: async I/O request completion will release the buffer,
+        bp->b_flags |= (XBF_DELWRI | XBF_ASYNC);
-         * and that can already be done by this point.  So using the
-         * buffer pointer from here on, after async I/O, is invalid.
-         */
-        if (!status && !(flags & XBF_ASYNC))
-                status = xfs_buf_iowait(bp);
-        return status;
+        xfs_buf_delwri_queue(bp, 1);
 }
 STATIC_INLINE void
@@ -1114,8 +1126,7 @@ xfs_buf_bio_end_io(
        unsigned int            blocksize = bp->b_target->bt_bsize;
        struct bio_vec          *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
-        if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
+        xfs_buf_ioerror(bp, -error);
-                bp->b_error = EIO;
        do {
                struct page     *page = bvec->bv_page;
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 456519a088c7..288ae7c4c800 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -168,7 +168,7 @@ typedef struct xfs_buf {
        struct completion       b_iowait;       /* queue for I/O waiters */
        void                    *b_fspriv;
        void                    *b_fspriv2;
-        void                    *b_fspriv3;
+        struct xfs_mount        *b_mount;
        unsigned short          b_error;        /* error code on I/O */
        unsigned int            b_page_count;   /* size of page array */
        unsigned int            b_offset;       /* page offset in first page */
@@ -214,9 +214,10 @@ extern void xfs_buf_lock(xfs_buf_t *);
 extern void xfs_buf_unlock(xfs_buf_t *);
 /* Buffer Read and Write Routines */
+extern int xfs_bawrite(void *mp, xfs_buf_t *bp);
+extern void xfs_bdwrite(void *mp, xfs_buf_t *bp);
 extern void xfs_buf_ioend(xfs_buf_t *,  int);
 extern void xfs_buf_ioerror(xfs_buf_t *, int);
-extern int xfs_buf_iostart(xfs_buf_t *, xfs_buf_flags_t);
 extern int xfs_buf_iorequest(xfs_buf_t *);
 extern int xfs_buf_iowait(xfs_buf_t *);
 extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, xfs_caddr_t,
@@ -311,10 +312,6 @@ extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *);
 #define XFS_BUF_UNORDERED(bp)   ((bp)->b_flags &= ~XBF_ORDERED)
 #define XFS_BUF_ISORDERED(bp)   ((bp)->b_flags & XBF_ORDERED)
-#define XFS_BUF_SHUT(bp)        do { } while (0)
-#define XFS_BUF_UNSHUT(bp)      do { } while (0)
-#define XFS_BUF_ISSHUT(bp)      (0)
 #define XFS_BUF_HOLD(bp)        xfs_buf_hold(bp)
 #define XFS_BUF_READ(bp)        ((bp)->b_flags |= XBF_READ)
 #define XFS_BUF_UNREAD(bp)      ((bp)->b_flags &= ~XBF_READ)
@@ -334,8 +331,6 @@ extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *);
 #define XFS_BUF_SET_FSPRIVATE(bp, val)          ((bp)->b_fspriv = (void*)(val))
 #define XFS_BUF_FSPRIVATE2(bp, type)            ((type)(bp)->b_fspriv2)
 #define XFS_BUF_SET_FSPRIVATE2(bp, val)         ((bp)->b_fspriv2 = (void*)(val))
-#define XFS_BUF_FSPRIVATE3(bp, type)            ((type)(bp)->b_fspriv3)
-#define XFS_BUF_SET_FSPRIVATE3(bp, val)         ((bp)->b_fspriv3 = (void*)(val))
 #define XFS_BUF_SET_START(bp)                   do { } while (0)
 #define XFS_BUF_SET_BRELSE_FUNC(bp, func)       ((bp)->b_relse = (func))
@@ -366,14 +361,6 @@ extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *);
 #define XFS_BUF_TARGET(bp)              ((bp)->b_target)
 #define XFS_BUFTARG_NAME(target)        xfs_buf_target_name(target)
-static inline int xfs_bawrite(void *mp, xfs_buf_t *bp)
-{
-        bp->b_fspriv3 = mp;
-        bp->b_strat = xfs_bdstrat_cb;
-        xfs_buf_delwri_dequeue(bp);
-        return xfs_buf_iostart(bp, XBF_WRITE | XBF_ASYNC | _XBF_RUN_QUEUES);
-}
 static inline void xfs_buf_relse(xfs_buf_t *bp)
 {
        if (!bp->b_relse)
@@ -414,17 +401,6 @@ static inline int XFS_bwrite(xfs_buf_t *bp)
        return error;
 }
-/*
- * No error can be returned from xfs_buf_iostart for delwri
- * buffers as they are queued and no I/O is issued.
- */
-static inline void xfs_bdwrite(void *mp, xfs_buf_t *bp)
-{
-        bp->b_strat = xfs_bdstrat_cb;
-        bp->b_fspriv3 = mp;
-        (void)xfs_buf_iostart(bp, XBF_DELWRI | XBF_ASYNC);
-}
 #define XFS_bdstrat(bp) xfs_buf_iorequest(bp)
 #define xfs_iowait(bp)  xfs_buf_iowait(bp)
diff --git a/fs/xfs/linux-2.6/xfs_cred.h b/fs/xfs/linux-2.6/xfs_cred.h
index 8c022cd0ad67..55bddf3b6091 100644
--- a/fs/xfs/linux-2.6/xfs_cred.h
+++ b/fs/xfs/linux-2.6/xfs_cred.h
@@ -25,12 +25,4 @@
 */
 typedef const struct cred cred_t;
-extern cred_t *sys_cred;
-/* this is a hack.. (assumes sys_cred is the only cred_t in the system) */
-static inline int capable_cred(cred_t *cr, int cid)
-{
-        return (cr == sys_cred) ? 1 : capable(cid);
-}
 #endif  /* __XFS_CRED_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 7f7abec25e14..595751f78350 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -29,7 +29,6 @@
 #include "xfs_vnodeops.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_inode.h"
-#include "xfs_vfsops.h"
 /*
 * Note that we only accept fileids which are long enough rather than allow
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 3fee790f138b..e14c4e3aea0c 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -36,89 +36,54 @@
 #include "xfs_inode.h"
 #include "xfs_error.h"
 #include "xfs_rw.h"
-#include "xfs_ioctl32.h"
 #include "xfs_vnodeops.h"
+#include "xfs_da_btree.h"
+#include "xfs_ioctl.h"
 #include <linux/dcache.h>
 #include <linux/smp_lock.h>
 static struct vm_operations_struct xfs_file_vm_ops;
-STATIC_INLINE ssize_t
+STATIC ssize_t
-__xfs_file_read(
+xfs_file_aio_read(
        struct kiocb            *iocb,
        const struct iovec      *iov,
        unsigned long           nr_segs,
-        int                     ioflags,
        loff_t                  pos)
 {
        struct file             *file = iocb->ki_filp;
+        int                     ioflags = IO_ISAIO;
        BUG_ON(iocb->ki_pos != pos);
        if (unlikely(file->f_flags & O_DIRECT))
                ioflags |= IO_ISDIRECT;
+        if (file->f_mode & FMODE_NOCMTIME)
+                ioflags |= IO_INVIS;
        return xfs_read(XFS_I(file->f_path.dentry->d_inode), iocb, iov,
                                nr_segs, &iocb->ki_pos, ioflags);
 }
 STATIC ssize_t
-xfs_file_aio_read(
+xfs_file_aio_write(
-        struct kiocb            *iocb,
-        const struct iovec      *iov,
-        unsigned long           nr_segs,
-        loff_t                  pos)
-{
-        return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO, pos);
-}
-STATIC ssize_t
-xfs_file_aio_read_invis(
-        struct kiocb            *iocb,
-        const struct iovec      *iov,
-        unsigned long           nr_segs,
-        loff_t                  pos)
-{
-        return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos);
-}
-STATIC_INLINE ssize_t
-__xfs_file_write(
        struct kiocb            *iocb,
        const struct iovec      *iov,
        unsigned long           nr_segs,
-        int                     ioflags,
        loff_t                  pos)
 {
-        struct file     *file = iocb->ki_filp;
+        struct file             *file = iocb->ki_filp;
+        int                     ioflags = IO_ISAIO;
        BUG_ON(iocb->ki_pos != pos);
        if (unlikely(file->f_flags & O_DIRECT))
                ioflags |= IO_ISDIRECT;
+        if (file->f_mode & FMODE_NOCMTIME)
+                ioflags |= IO_INVIS;
        return xfs_write(XFS_I(file->f_mapping->host), iocb, iov, nr_segs,
                                &iocb->ki_pos, ioflags);
 }
 STATIC ssize_t
-xfs_file_aio_write(
-        struct kiocb            *iocb,
-        const struct iovec      *iov,
-        unsigned long           nr_segs,
-        loff_t                  pos)
-{
-        return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO, pos);
-}
-STATIC ssize_t
-xfs_file_aio_write_invis(
-        struct kiocb            *iocb,
-        const struct iovec      *iov,
-        unsigned long           nr_segs,
-        loff_t                  pos)
-{
-        return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos);
-}
-STATIC ssize_t
 xfs_file_splice_read(
        struct file             *infilp,
        loff_t                  *ppos,
@@ -126,20 +91,13 @@ xfs_file_splice_read(
        size_t                  len,
        unsigned int            flags)
 {
-        return xfs_splice_read(XFS_I(infilp->f_path.dentry->d_inode),
+        int                     ioflags = 0;
-                                   infilp, ppos, pipe, len, flags, 0);
-}
+        if (infilp->f_mode & FMODE_NOCMTIME)
+                ioflags |= IO_INVIS;
-STATIC ssize_t
-xfs_file_splice_read_invis(
-        struct file             *infilp,
-        loff_t                  *ppos,
-        struct pipe_inode_info  *pipe,
-        size_t                  len,
-        unsigned int            flags)
-{
        return xfs_splice_read(XFS_I(infilp->f_path.dentry->d_inode),
-                                   infilp, ppos, pipe, len, flags, IO_INVIS);
+                                   infilp, ppos, pipe, len, flags, ioflags);
 }
 STATIC ssize_t
@@ -150,30 +108,49 @@ xfs_file_splice_write(
        size_t                  len,
        unsigned int            flags)
 {
-        return xfs_splice_write(XFS_I(outfilp->f_path.dentry->d_inode),
+        int                     ioflags = 0;
-                                    pipe, outfilp, ppos, len, flags, 0);
-}
+        if (outfilp->f_mode & FMODE_NOCMTIME)
+                ioflags |= IO_INVIS;
-STATIC ssize_t
-xfs_file_splice_write_invis(
-        struct pipe_inode_info  *pipe,
-        struct file             *outfilp,
-        loff_t                  *ppos,
-        size_t                  len,
-        unsigned int            flags)
-{
        return xfs_splice_write(XFS_I(outfilp->f_path.dentry->d_inode),
-                                    pipe, outfilp, ppos, len, flags, IO_INVIS);
+                                    pipe, outfilp, ppos, len, flags, ioflags);
 }
 STATIC int
 xfs_file_open(
        struct inode    *inode,
-        struct file     *filp)
+        struct file     *file)
 {
-        if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
+        if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
                return -EFBIG;
-        return -xfs_open(XFS_I(inode));
+        if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb)))
+                return -EIO;
+        return 0;
+}
+STATIC int
+xfs_dir_open(
+        struct inode    *inode,
+        struct file     *file)
+{
+        struct xfs_inode *ip = XFS_I(inode);
+        int             mode;
+        int             error;
+        error = xfs_file_open(inode, file);
+        if (error)
+                return error;
+        /*
+         * If there are any blocks, read-ahead block 0 as we're almost
+         * certain to have the next operation be a read there.
+         */
+        mode = xfs_ilock_map_shared(ip);
+        if (ip->i_d.di_nextents > 0)
+                xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK);
+        xfs_iunlock(ip, mode);
+        return 0;
 }
 STATIC int
@@ -227,7 +204,7 @@ xfs_file_readdir(
         * point we can change the ->readdir prototype to include the
         * buffer size.
         */
-        bufsize = (size_t)min_t(loff_t, PAGE_SIZE, inode->i_size);
+        bufsize = (size_t)min_t(loff_t, PAGE_SIZE, ip->i_d.di_size);
        error = xfs_readdir(ip, dirent, bufsize,
                                (xfs_off_t *)&filp->f_pos, filldir);
@@ -248,48 +225,6 @@ xfs_file_mmap(
        return 0;
 }
-STATIC long
-xfs_file_ioctl(
-        struct file     *filp,
-        unsigned int    cmd,
-        unsigned long   p)
-{
-        int             error;
-        struct inode    *inode = filp->f_path.dentry->d_inode;
-        error = xfs_ioctl(XFS_I(inode), filp, 0, cmd, (void __user *)p);
-        xfs_iflags_set(XFS_I(inode), XFS_IMODIFIED);
-        /* NOTE:  some of the ioctl's return positive #'s as a
-         *        byte count indicating success, such as
-         *        readlink_by_handle.  So we don't "sign flip"
-         *        like most other routines.  This means true
-         *        errors need to be returned as a negative value.
-         */
-        return error;
-}
-STATIC long
-xfs_file_ioctl_invis(
-        struct file     *filp,
-        unsigned int    cmd,
-        unsigned long   p)
-{
-        int             error;
-        struct inode    *inode = filp->f_path.dentry->d_inode;
-        error = xfs_ioctl(XFS_I(inode), filp, IO_INVIS, cmd, (void __user *)p);
-        xfs_iflags_set(XFS_I(inode), XFS_IMODIFIED);
-        /* NOTE:  some of the ioctl's return positive #'s as a
-         *        byte count indicating success, such as
-         *        readlink_by_handle.  So we don't "sign flip"
-         *        like most other routines.  This means true
-         *        errors need to be returned as a negative value.
-         */
-        return error;
-}
 /*
 * mmap()d file has taken write protection fault and is being made
 * writable. We can set the page state up correctly for a writable
@@ -325,26 +260,8 @@ const struct file_operations xfs_file_operations = {
 #endif
 };
-const struct file_operations xfs_invis_file_operations = {
-        .llseek         = generic_file_llseek,
-        .read           = do_sync_read,
-        .write          = do_sync_write,
-        .aio_read       = xfs_file_aio_read_invis,
-        .aio_write      = xfs_file_aio_write_invis,
-        .splice_read    = xfs_file_splice_read_invis,
-        .splice_write   = xfs_file_splice_write_invis,
-        .unlocked_ioctl = xfs_file_ioctl_invis,
-#ifdef CONFIG_COMPAT
-        .compat_ioctl   = xfs_file_compat_invis_ioctl,
-#endif
-        .mmap           = xfs_file_mmap,
-        .open           = xfs_file_open,
-        .release        = xfs_file_release,
-        .fsync          = xfs_file_fsync,
-};
 const struct file_operations xfs_dir_file_operations = {
+        .open           = xfs_dir_open,
        .read           = generic_read_dir,
        .readdir        = xfs_file_readdir,
        .llseek         = generic_file_llseek,
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
index 36caa6d957df..5aeb77776961 100644
--- a/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -24,6 +24,10 @@ int  fs_noerr(void) { return 0; }
 int  fs_nosys(void) { return ENOSYS; }
 void fs_noval(void) { return; }
+/*
+ * note: all filemap functions return negative error codes. These
+ * need to be inverted before returning to the xfs core functions.
+ */
 void
 xfs_tosspages(
        xfs_inode_t     *ip,
@@ -53,7 +57,7 @@ xfs_flushinval_pages(
                if (!ret)
                        truncate_inode_pages(mapping, first);
        }
-        return ret;
+        return -ret;
 }
 int
@@ -72,10 +76,23 @@ xfs_flush_pages(
                xfs_iflags_clear(ip, XFS_ITRUNCATED);
                ret = filemap_fdatawrite(mapping);
                if (flags & XFS_B_ASYNC)
-                        return ret;
+                        return -ret;
                ret2 = filemap_fdatawait(mapping);
                if (!ret)
                        ret = ret2;
        }
-        return ret;
+        return -ret;
+}
+int
+xfs_wait_on_pages(
+        xfs_inode_t     *ip,
+        xfs_off_t       first,
+        xfs_off_t       last)
+{
+        struct address_space *mapping = VFS_I(ip)->i_mapping;
+        if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
+                return -filemap_fdatawait(mapping);
+        return 0;
 }
diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c
index ef90e64641e6..2ae8b1ccb02e 100644
--- a/fs/xfs/linux-2.6/xfs_globals.c
+++ b/fs/xfs/linux-2.6/xfs_globals.c
@@ -26,7 +26,6 @@
 */
 xfs_param_t xfs_params = {
                          /*    MIN             DFLT            MAX     */
-        .restrict_chown = {     0,              1,              1       },
        .sgid_inherit   = {     0,              0,              1       },
        .symlink_mode   = {     0,              0,              1       },
        .panic_mask     = {     0,              0,              255     },
@@ -43,10 +42,3 @@ xfs_param_t xfs_params = {
        .inherit_nodfrg = {     0,              1,              1       },
        .fstrm_timer    = {     1,              30*100,         3600*100},
 };
-/*
- * Global system credential structure.
- */
-static cred_t sys_cred_val;
-cred_t *sys_cred = &sys_cred_val;
diff --git a/fs/xfs/linux-2.6/xfs_globals.h b/fs/xfs/linux-2.6/xfs_globals.h
index 6eda8a3eb6f1..69f71caf061c 100644
--- a/fs/xfs/linux-2.6/xfs_globals.h
+++ b/fs/xfs/linux-2.6/xfs_globals.h
@@ -19,6 +19,5 @@
 #define __XFS_GLOBALS_H__
 extern uint64_t xfs_panic_mask;         /* set to cause more panics */
-extern cred_t *sys_cred;
 #endif  /* __XFS_GLOBALS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 281cbd5a25cf..67205f6198ba 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -68,26 +68,22 @@
 * XFS_IOC_PATH_TO_HANDLE
 *    returns full handle for a path
 */
-STATIC int
+int
 xfs_find_handle(
        unsigned int            cmd,
-        void                    __user *arg)
+        xfs_fsop_handlereq_t    *hreq)
 {
        int                     hsize;
        xfs_handle_t            handle;
-        xfs_fsop_handlereq_t    hreq;
        struct inode            *inode;
-        if (copy_from_user(&hreq, arg, sizeof(hreq)))
-                return -XFS_ERROR(EFAULT);
        memset((char *)&handle, 0, sizeof(handle));
        switch (cmd) {
        case XFS_IOC_PATH_TO_FSHANDLE:
        case XFS_IOC_PATH_TO_HANDLE: {
                struct path path;
-                int error = user_lpath((const char __user *)hreq.path, &path);
+                int error = user_lpath((const char __user *)hreq->path, &path);
                if (error)
                        return error;
@@ -101,7 +97,7 @@ xfs_find_handle(
        case XFS_IOC_FD_TO_HANDLE: {
                struct file     *file;
-                file = fget(hreq.fd);
+                file = fget(hreq->fd);
                if (!file)
                    return -EBADF;
@@ -158,8 +154,8 @@ xfs_find_handle(
        }
        /* now copy our handle into the user buffer & write out the size */
-        if (copy_to_user(hreq.ohandle, &handle, hsize) ||
+        if (copy_to_user(hreq->ohandle, &handle, hsize) ||
-            copy_to_user(hreq.ohandlen, &hsize, sizeof(__s32))) {
+            copy_to_user(hreq->ohandlen, &hsize, sizeof(__s32))) {
                iput(inode);
                return -XFS_ERROR(EFAULT);
        }
@@ -249,10 +245,10 @@ xfs_vget_fsop_handlereq(
        return 0;
 }
-STATIC int
+int
 xfs_open_by_handle(
        xfs_mount_t             *mp,
-        void                    __user *arg,
+        xfs_fsop_handlereq_t    *hreq,
        struct file             *parfilp,
        struct inode            *parinode)
 {
@@ -263,14 +259,11 @@ xfs_open_by_handle(
        struct file             *filp;
        struct inode            *inode;
        struct dentry           *dentry;
-        xfs_fsop_handlereq_t    hreq;
        if (!capable(CAP_SYS_ADMIN))
                return -XFS_ERROR(EPERM);
-        if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
-                return -XFS_ERROR(EFAULT);
-        error = xfs_vget_fsop_handlereq(mp, parinode, &hreq, &inode);
+        error = xfs_vget_fsop_handlereq(mp, parinode, hreq, &inode);
        if (error)
                return -error;
@@ -281,10 +274,10 @@ xfs_open_by_handle(
        }
 #if BITS_PER_LONG != 32
-        hreq.oflags |= O_LARGEFILE;
+        hreq->oflags |= O_LARGEFILE;
 #endif
        /* Put open permission in namei format. */
-        permflag = hreq.oflags;
+        permflag = hreq->oflags;
        if ((permflag+1) & O_ACCMODE)
                permflag++;
        if (permflag & O_TRUNC)
@@ -322,15 +315,16 @@ xfs_open_by_handle(
        mntget(parfilp->f_path.mnt);
        /* Create file pointer. */
-        filp = dentry_open(dentry, parfilp->f_path.mnt, hreq.oflags, cred);
+        filp = dentry_open(dentry, parfilp->f_path.mnt, hreq->oflags, cred);
        if (IS_ERR(filp)) {
                put_unused_fd(new_fd);
                return -XFS_ERROR(-PTR_ERR(filp));
        }
        if (inode->i_mode & S_IFREG) {
                /* invisible operation should not change atime */
                filp->f_flags |= O_NOATIME;
-                filp->f_op = &xfs_invis_file_operations;
+                filp->f_mode |= FMODE_NOCMTIME;
        }
        fd_install(new_fd, filp);
@@ -363,24 +357,21 @@ do_readlink(
 }
-STATIC int
+int
 xfs_readlink_by_handle(
        xfs_mount_t             *mp,
-        void                    __user *arg,
+        xfs_fsop_handlereq_t    *hreq,
        struct inode            *parinode)
 {
        struct inode            *inode;
-        xfs_fsop_handlereq_t    hreq;
        __u32                   olen;
        void                    *link;
        int                     error;
        if (!capable(CAP_SYS_ADMIN))
                return -XFS_ERROR(EPERM);
-        if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
-                return -XFS_ERROR(EFAULT);
-        error = xfs_vget_fsop_handlereq(mp, parinode, &hreq, &inode);
+        error = xfs_vget_fsop_handlereq(mp, parinode, hreq, &inode);
        if (error)
                return -error;
@@ -390,7 +381,7 @@ xfs_readlink_by_handle(
                goto out_iput;
        }
-        if (copy_from_user(&olen, hreq.ohandlen, sizeof(__u32))) {
+        if (copy_from_user(&olen, hreq->ohandlen, sizeof(__u32))) {
                error = -XFS_ERROR(EFAULT);
                goto out_iput;
        }
@@ -402,7 +393,7 @@ xfs_readlink_by_handle(
        error = -xfs_readlink(XFS_I(inode), link);
        if (error)
                goto out_kfree;
-        error = do_readlink(hreq.ohandle, olen, link);
+        error = do_readlink(hreq->ohandle, olen, link);
        if (error)
                goto out_kfree;
@@ -501,7 +492,7 @@ xfs_attrlist_by_handle(
        return -error;
 }
-STATIC int
+int
 xfs_attrmulti_attr_get(
        struct inode            *inode,
        char                    *name,
@@ -530,7 +521,7 @@ xfs_attrmulti_attr_get(
        return error;
 }
-STATIC int
+int
 xfs_attrmulti_attr_set(
        struct inode            *inode,
        char                    *name,
@@ -560,7 +551,7 @@ xfs_attrmulti_attr_set(
        return error;
 }
-STATIC int
+int
 xfs_attrmulti_attr_remove(
        struct inode            *inode,
        char                    *name,
@@ -662,19 +653,26 @@ xfs_attrmulti_by_handle(
        return -error;
 }
-STATIC int
+int
 xfs_ioc_space(
        struct xfs_inode        *ip,
        struct inode            *inode,
        struct file             *filp,
        int                     ioflags,
        unsigned int            cmd,
-        void                    __user *arg)
+        xfs_flock64_t           *bf)
 {
-        xfs_flock64_t           bf;
        int                     attr_flags = 0;
        int                     error;
+        /*
+         * Only allow the sys admin to reserve space unless
+         * unwritten extents are enabled.
+         */
+        if (!xfs_sb_version_hasextflgbit(&ip->i_mount->m_sb) &&
+            !capable(CAP_SYS_ADMIN))
+                return -XFS_ERROR(EPERM);
        if (inode->i_flags & (S_IMMUTABLE|S_APPEND))
                return -XFS_ERROR(EPERM);
@@ -684,16 +682,12 @@ xfs_ioc_space(
        if (!S_ISREG(inode->i_mode))
                return -XFS_ERROR(EINVAL);
-        if (copy_from_user(&bf, arg, sizeof(bf)))
-                return -XFS_ERROR(EFAULT);
        if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
                attr_flags |= XFS_ATTR_NONBLOCK;
        if (ioflags & IO_INVIS)
                attr_flags |= XFS_ATTR_DMI;
-        error = xfs_change_file_space(ip, cmd, &bf, filp->f_pos,
+        error = xfs_change_file_space(ip, cmd, bf, filp->f_pos, attr_flags);
-                                              NULL, attr_flags);
        return -error;
 }
@@ -1105,10 +1099,6 @@ xfs_ioctl_setattr(
        /*
         * Change file ownership.  Must be the owner or privileged.
-         * If the system was configured with the "restricted_chown"
-         * option, the owner is not permitted to give away the file,
-         * and can change the group id only to a group of which he
-         * or she is a member.
         */
        if (mask & FSX_PROJID) {
                /*
@@ -1137,7 +1127,7 @@ xfs_ioctl_setattr(
                         * the superblock version number since projids didn't
                         * exist before DINODE_VERSION_2 and SB_VERSION_NLINK.
                         */
-                        if (ip->i_d.di_version == XFS_DINODE_VERSION_1)
+                        if (ip->i_d.di_version == 1)
                                xfs_bump_ino_vers2(tp, ip);
                }
@@ -1256,43 +1246,67 @@ xfs_ioc_setxflags(
 }
 STATIC int
+xfs_getbmap_format(void **ap, struct getbmapx *bmv, int *full)
+{
+        struct getbmap __user   *base = *ap;
+        /* copy only getbmap portion (not getbmapx) */
+        if (copy_to_user(base, bmv, sizeof(struct getbmap)))
+                return XFS_ERROR(EFAULT);
+        *ap += sizeof(struct getbmap);
+        return 0;
+}
+STATIC int
 xfs_ioc_getbmap(
        struct xfs_inode        *ip,
        int                     ioflags,
        unsigned int            cmd,
        void                    __user *arg)
 {
-        struct getbmap          bm;
+        struct getbmapx         bmx;
-        int                     iflags;
        int                     error;
-        if (copy_from_user(&bm, arg, sizeof(bm)))
+        if (copy_from_user(&bmx, arg, sizeof(struct getbmapx)))
                return -XFS_ERROR(EFAULT);
-        if (bm.bmv_count < 2)
+        if (bmx.bmv_count < 2)
                return -XFS_ERROR(EINVAL);
-        iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0);
+        bmx.bmv_iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0);
        if (ioflags & IO_INVIS)
-                iflags |= BMV_IF_NO_DMAPI_READ;
+                bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ;
-        error = xfs_getbmap(ip, &bm, (struct getbmap __user *)arg+1, iflags);
+        error = xfs_getbmap(ip, &bmx, xfs_getbmap_format,
+                            (struct getbmap *)arg+1);
        if (error)
                return -error;
-        if (copy_to_user(arg, &bm, sizeof(bm)))
+        /* copy back header - only size of getbmap */
+        if (copy_to_user(arg, &bmx, sizeof(struct getbmap)))
                return -XFS_ERROR(EFAULT);
        return 0;
 }
 STATIC int
+xfs_getbmapx_format(void **ap, struct getbmapx *bmv, int *full)
+{
+        struct getbmapx __user  *base = *ap;
+        if (copy_to_user(base, bmv, sizeof(struct getbmapx)))
+                return XFS_ERROR(EFAULT);
+        *ap += sizeof(struct getbmapx);
+        return 0;
+}
+STATIC int
 xfs_ioc_getbmapx(
        struct xfs_inode        *ip,
        void                    __user *arg)
 {
        struct getbmapx         bmx;
-        struct getbmap          bm;
-        int                     iflags;
        int                     error;
        if (copy_from_user(&bmx, arg, sizeof(bmx)))
@@ -1301,46 +1315,46 @@ xfs_ioc_getbmapx(
        if (bmx.bmv_count < 2)
                return -XFS_ERROR(EINVAL);
-        /*
+        if (bmx.bmv_iflags & (~BMV_IF_VALID))
-         * Map input getbmapx structure to a getbmap
-         * structure for xfs_getbmap.
-         */
-        GETBMAP_CONVERT(bmx, bm);
-        iflags = bmx.bmv_iflags;
-        if (iflags & (~BMV_IF_VALID))
                return -XFS_ERROR(EINVAL);
-        iflags |= BMV_IF_EXTENDED;
+        error = xfs_getbmap(ip, &bmx, xfs_getbmapx_format,
+                            (struct getbmapx *)arg+1);
-        error = xfs_getbmap(ip, &bm, (struct getbmapx __user *)arg+1, iflags);
        if (error)
                return -error;
-        GETBMAP_CONVERT(bm, bmx);
+        /* copy back header */
+        if (copy_to_user(arg, &bmx, sizeof(struct getbmapx)))
-        if (copy_to_user(arg, &bmx, sizeof(bmx)))
                return -XFS_ERROR(EFAULT);
        return 0;
 }
-int
+/*
-xfs_ioctl(
+ * Note: some of the ioctl's return positive numbers as a
-        xfs_inode_t             *ip,
+ * byte count indicating success, such as readlink_by_handle.
+ * So we don't "sign flip" like most other routines.  This means
+ * true errors need to be returned as a negative value.
+ */
+long
+xfs_file_ioctl(
        struct file             *filp,
-        int                     ioflags,
        unsigned int            cmd,
-        void                    __user *arg)
+        unsigned long           p)
 {
        struct inode            *inode = filp->f_path.dentry->d_inode;
-        xfs_mount_t             *mp = ip->i_mount;
+        struct xfs_inode        *ip = XFS_I(inode);
+        struct xfs_mount        *mp = ip->i_mount;
+        void                    __user *arg = (void __user *)p;
+        int                     ioflags = 0;
        int                     error;
-        xfs_itrace_entry(XFS_I(inode));
+        if (filp->f_mode & FMODE_NOCMTIME)
-        switch (cmd) {
+                ioflags |= IO_INVIS;
+        xfs_itrace_entry(ip);
+        switch (cmd) {
        case XFS_IOC_ALLOCSP:
        case XFS_IOC_FREESP:
        case XFS_IOC_RESVSP:
@@ -1348,17 +1362,13 @@ xfs_ioctl(
        case XFS_IOC_ALLOCSP64:
        case XFS_IOC_FREESP64:
        case XFS_IOC_RESVSP64:
-        case XFS_IOC_UNRESVSP64:
+        case XFS_IOC_UNRESVSP64: {
-                /*
+                xfs_flock64_t           bf;
-                 * Only allow the sys admin to reserve space unless
-                 * unwritten extents are enabled.
-                 */
-                if (!xfs_sb_version_hasextflgbit(&mp->m_sb) &&
-                    !capable(CAP_SYS_ADMIN))
-                        return -EPERM;
-                return xfs_ioc_space(ip, inode, filp, ioflags, cmd, arg);
+                if (copy_from_user(&bf, arg, sizeof(bf)))
+                        return -XFS_ERROR(EFAULT);
+                return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf);
+        }
        case XFS_IOC_DIOINFO: {
                struct dioattr  da;
                xfs_buftarg_t   *target =
@@ -1418,18 +1428,30 @@ xfs_ioctl(
        case XFS_IOC_FD_TO_HANDLE:
        case XFS_IOC_PATH_TO_HANDLE:
-        case XFS_IOC_PATH_TO_FSHANDLE:
+        case XFS_IOC_PATH_TO_FSHANDLE: {
-                return xfs_find_handle(cmd, arg);
+                xfs_fsop_handlereq_t    hreq;
-        case XFS_IOC_OPEN_BY_HANDLE:
+                if (copy_from_user(&hreq, arg, sizeof(hreq)))
-                return xfs_open_by_handle(mp, arg, filp, inode);
+                        return -XFS_ERROR(EFAULT);
+                return xfs_find_handle(cmd, &hreq);
+        }
+        case XFS_IOC_OPEN_BY_HANDLE: {
+                xfs_fsop_handlereq_t    hreq;
+                if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
+                        return -XFS_ERROR(EFAULT);
+                return xfs_open_by_handle(mp, &hreq, filp, inode);
+        }
        case XFS_IOC_FSSETDM_BY_HANDLE:
                return xfs_fssetdm_by_handle(mp, arg, inode);
-        case XFS_IOC_READLINK_BY_HANDLE:
+        case XFS_IOC_READLINK_BY_HANDLE: {
-                return xfs_readlink_by_handle(mp, arg, inode);
+                xfs_fsop_handlereq_t    hreq;
+                if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
+                        return -XFS_ERROR(EFAULT);
+                return xfs_readlink_by_handle(mp, &hreq, inode);
+        }
        case XFS_IOC_ATTRLIST_BY_HANDLE:
                return xfs_attrlist_by_handle(mp, arg, inode);
@@ -1437,7 +1459,11 @@ xfs_ioctl(
                return xfs_attrmulti_by_handle(mp, arg, filp, inode);
        case XFS_IOC_SWAPEXT: {
-                error = xfs_swapext((struct xfs_swapext __user *)arg);
+                struct xfs_swapext      sxp;
+                if (copy_from_user(&sxp, arg, sizeof(xfs_swapext_t)))
+                        return -XFS_ERROR(EFAULT);
+                error = xfs_swapext(&sxp);
                return -error;
        }
@@ -1493,9 +1519,6 @@ xfs_ioctl(
        case XFS_IOC_FSGROWFSDATA: {
                xfs_growfs_data_t in;
-                if (!capable(CAP_SYS_ADMIN))
-                        return -EPERM;
                if (copy_from_user(&in, arg, sizeof(in)))
                        return -XFS_ERROR(EFAULT);
@@ -1506,9 +1529,6 @@ xfs_ioctl(
        case XFS_IOC_FSGROWFSLOG: {
                xfs_growfs_log_t in;
-                if (!capable(CAP_SYS_ADMIN))
-                        return -EPERM;
                if (copy_from_user(&in, arg, sizeof(in)))
                        return -XFS_ERROR(EFAULT);
@@ -1519,9 +1539,6 @@ xfs_ioctl(
        case XFS_IOC_FSGROWFSRT: {
                xfs_growfs_rt_t in;
-                if (!capable(CAP_SYS_ADMIN))
-                        return -EPERM;
                if (copy_from_user(&in, arg, sizeof(in)))
                        return -XFS_ERROR(EFAULT);
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.h b/fs/xfs/linux-2.6/xfs_ioctl.h
new file mode 100644
index 000000000000..8c16bf2d7e03
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_ioctl.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2008 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_IOCTL_H__
+#define __XFS_IOCTL_H__
+extern int
+xfs_ioc_space(
+        struct xfs_inode        *ip,
+        struct inode            *inode,
+        struct file             *filp,
+        int                     ioflags,
+        unsigned int            cmd,
+        xfs_flock64_t           *bf);
+extern int
+xfs_find_handle(
+        unsigned int            cmd,
+        xfs_fsop_handlereq_t    *hreq);
+extern int
+xfs_open_by_handle(
+        xfs_mount_t             *mp,
+        xfs_fsop_handlereq_t    *hreq,
+        struct file             *parfilp,
+        struct inode            *parinode);
+extern int
+xfs_readlink_by_handle(
+        xfs_mount_t             *mp,
+        xfs_fsop_handlereq_t    *hreq,
+        struct inode            *parinode);
+extern int
+xfs_attrmulti_attr_get(
+        struct inode            *inode,
+        char                    *name,
+        char                    __user *ubuf,
+        __uint32_t              *len,
+        __uint32_t              flags);
+extern int
+        xfs_attrmulti_attr_set(
+        struct inode            *inode,
+        char                    *name,
+        const char              __user *ubuf,
+        __uint32_t              len,
+        __uint32_t              flags);
+extern int
+xfs_attrmulti_attr_remove(
+        struct inode            *inode,
+        char                    *name,
+        __uint32_t              flags);
+extern long
+xfs_file_ioctl(
+        struct file             *filp,
+        unsigned int            cmd,
+        unsigned long           p);
+extern long
+xfs_file_compat_ioctl(
+        struct file             *file,
+        unsigned int            cmd,
+        unsigned long           arg);
+#endif
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index a4b254eb43b2..0504cece9f66 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -16,11 +16,7 @@
 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 #include <linux/compat.h>
-#include <linux/init.h>
 #include <linux/ioctl.h>
-#include <linux/syscalls.h>
-#include <linux/types.h>
-#include <linux/fs.h>
 #include <asm/uaccess.h>
 #include "xfs.h"
 #include "xfs_fs.h"
@@ -36,7 +32,6 @@
 #include "xfs_bmap_btree.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dir2_sf.h"
-#include "xfs_vfs.h"
 #include "xfs_vnode.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
@@ -44,221 +39,219 @@
 #include "xfs_error.h"
 #include "xfs_dfrag.h"
 #include "xfs_vnodeops.h"
+#include "xfs_fsops.h"
+#include "xfs_alloc.h"
+#include "xfs_rtalloc.h"
+#include "xfs_attr.h"
+#include "xfs_ioctl.h"
 #include "xfs_ioctl32.h"
 #define  _NATIVE_IOC(cmd, type) \
          _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type))
-#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
+#ifdef BROKEN_X86_ALIGNMENT
-#define BROKEN_X86_ALIGNMENT
+STATIC int
-#define _PACKED __attribute__((packed))
+xfs_compat_flock64_copyin(
-/* on ia32 l_start is on a 32-bit boundary */
+        xfs_flock64_t           *bf,
-typedef struct xfs_flock64_32 {
+        compat_xfs_flock64_t    __user *arg32)
-        __s16           l_type;
-        __s16           l_whence;
-        __s64           l_start __attribute__((packed));
-                        /* len == 0 means until end of file */
-        __s64           l_len __attribute__((packed));
-        __s32           l_sysid;
-        __u32           l_pid;
-        __s32           l_pad[4];       /* reserve area */
-} xfs_flock64_32_t;
-#define XFS_IOC_ALLOCSP_32      _IOW ('X', 10, struct xfs_flock64_32)
-#define XFS_IOC_FREESP_32       _IOW ('X', 11, struct xfs_flock64_32)
-#define XFS_IOC_ALLOCSP64_32    _IOW ('X', 36, struct xfs_flock64_32)
-#define XFS_IOC_FREESP64_32     _IOW ('X', 37, struct xfs_flock64_32)
-#define XFS_IOC_RESVSP_32       _IOW ('X', 40, struct xfs_flock64_32)
-#define XFS_IOC_UNRESVSP_32     _IOW ('X', 41, struct xfs_flock64_32)
-#define XFS_IOC_RESVSP64_32     _IOW ('X', 42, struct xfs_flock64_32)
-#define XFS_IOC_UNRESVSP64_32   _IOW ('X', 43, struct xfs_flock64_32)
-/* just account for different alignment */
-STATIC unsigned long
-xfs_ioctl32_flock(
-        unsigned long           arg)
 {
-        xfs_flock64_32_t        __user *p32 = (void __user *)arg;
+        if (get_user(bf->l_type,        &arg32->l_type) ||
-        xfs_flock64_t           __user *p = compat_alloc_user_space(sizeof(*p));
+            get_user(bf->l_whence,      &arg32->l_whence) ||
+            get_user(bf->l_start,       &arg32->l_start) ||
-        if (copy_in_user(&p->l_type,    &p32->l_type,   sizeof(s16)) ||
+            get_user(bf->l_len,         &arg32->l_len) ||
-            copy_in_user(&p->l_whence,  &p32->l_whence, sizeof(s16)) ||
+            get_user(bf->l_sysid,       &arg32->l_sysid) ||
-            copy_in_user(&p->l_start,   &p32->l_start,  sizeof(s64)) ||
+            get_user(bf->l_pid,         &arg32->l_pid) ||
-            copy_in_user(&p->l_len,     &p32->l_len,    sizeof(s64)) ||
+            copy_from_user(bf->l_pad,   &arg32->l_pad,  4*sizeof(u32)))
-            copy_in_user(&p->l_sysid,   &p32->l_sysid,  sizeof(s32)) ||
+                return -XFS_ERROR(EFAULT);
-            copy_in_user(&p->l_pid,     &p32->l_pid,    sizeof(u32)) ||
+        return 0;
-            copy_in_user(&p->l_pad,     &p32->l_pad,    4*sizeof(u32)))
-                return -EFAULT;
-        return (unsigned long)p;
 }
-typedef struct compat_xfs_fsop_geom_v1 {
+STATIC int
-        __u32           blocksize;      /* filesystem (data) block size */
+xfs_compat_ioc_fsgeometry_v1(
-        __u32           rtextsize;      /* realtime extent size         */
+        struct xfs_mount          *mp,
-        __u32           agblocks;       /* fsblocks in an AG            */
+        compat_xfs_fsop_geom_v1_t __user *arg32)
-        __u32           agcount;        /* number of allocation groups  */
-        __u32           logblocks;      /* fsblocks in the log          */
-        __u32           sectsize;       /* (data) sector size, bytes    */
-        __u32           inodesize;      /* inode size in bytes          */
-        __u32           imaxpct;        /* max allowed inode space(%)   */
-        __u64           datablocks;     /* fsblocks in data subvolume   */
-        __u64           rtblocks;       /* fsblocks in realtime subvol  */
-        __u64           rtextents;      /* rt extents in realtime subvol*/
-        __u64           logstart;       /* starting fsblock of the log  */
-        unsigned char   uuid[16];       /* unique id of the filesystem  */
-        __u32           sunit;          /* stripe unit, fsblocks        */
-        __u32           swidth;         /* stripe width, fsblocks       */
-        __s32           version;        /* structure version            */
-        __u32           flags;          /* superblock version flags     */
-        __u32           logsectsize;    /* log sector size, bytes       */
-        __u32           rtsectsize;     /* realtime sector size, bytes  */
-        __u32           dirblocksize;   /* directory block size, bytes  */
-} __attribute__((packed)) compat_xfs_fsop_geom_v1_t;
-#define XFS_IOC_FSGEOMETRY_V1_32  \
-        _IOR ('X', 100, struct compat_xfs_fsop_geom_v1)
-STATIC unsigned long xfs_ioctl32_geom_v1(unsigned long arg)
 {
-        compat_xfs_fsop_geom_v1_t __user *p32 = (void __user *)arg;
+        xfs_fsop_geom_t           fsgeo;
-        xfs_fsop_geom_v1_t __user *p = compat_alloc_user_space(sizeof(*p));
+        int                       error;
-        if (copy_in_user(p, p32, sizeof(*p32)))
+        error = xfs_fs_geometry(mp, &fsgeo, 3);
-                return -EFAULT;
+        if (error)
-        return (unsigned long)p;
+                return -error;
+        /* The 32-bit variant simply has some padding at the end */
+        if (copy_to_user(arg32, &fsgeo, sizeof(struct compat_xfs_fsop_geom_v1)))
+                return -XFS_ERROR(EFAULT);
+        return 0;
 }
-typedef struct compat_xfs_inogrp {
+STATIC int
-        __u64           xi_startino;    /* starting inode number        */
+xfs_compat_growfs_data_copyin(
-        __s32           xi_alloccount;  /* # bits set in allocmask      */
+        struct xfs_growfs_data   *in,
-        __u64           xi_allocmask;   /* mask of allocated inodes     */
+        compat_xfs_growfs_data_t __user *arg32)
-} __attribute__((packed)) compat_xfs_inogrp_t;
-STATIC int xfs_inumbers_fmt_compat(
-        void __user *ubuffer,
-        const xfs_inogrp_t *buffer,
-        long count,
-        long *written)
 {
-        compat_xfs_inogrp_t __user *p32 = ubuffer;
+        if (get_user(in->newblocks, &arg32->newblocks) ||
-        long i;
+            get_user(in->imaxpct,   &arg32->imaxpct))
+                return -XFS_ERROR(EFAULT);
+        return 0;
+}
+STATIC int
+xfs_compat_growfs_rt_copyin(
+        struct xfs_growfs_rt     *in,
+        compat_xfs_growfs_rt_t  __user *arg32)
+{
+        if (get_user(in->newblocks, &arg32->newblocks) ||
+            get_user(in->extsize,   &arg32->extsize))
+                return -XFS_ERROR(EFAULT);
+        return 0;
+}
+STATIC int
+xfs_inumbers_fmt_compat(
+        void                    __user *ubuffer,
+        const xfs_inogrp_t      *buffer,
+        long                    count,
+        long                    *written)
+{
+        compat_xfs_inogrp_t     __user *p32 = ubuffer;
+        long                    i;
        for (i = 0; i < count; i++) {
                if (put_user(buffer[i].xi_startino,   &p32[i].xi_startino) ||
                    put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) ||
                    put_user(buffer[i].xi_allocmask,  &p32[i].xi_allocmask))
-                        return -EFAULT;
+                        return -XFS_ERROR(EFAULT);
        }
        *written = count * sizeof(*p32);
        return 0;
 }
 #else
 #define xfs_inumbers_fmt_compat xfs_inumbers_fmt
-#define _PACKED
+#endif  /* BROKEN_X86_ALIGNMENT */
-#endif
+STATIC int
+xfs_ioctl32_bstime_copyin(
+        xfs_bstime_t            *bstime,
+        compat_xfs_bstime_t     __user *bstime32)
+{
+        compat_time_t           sec32;  /* tv_sec differs on 64 vs. 32 */
-/* XFS_IOC_FSBULKSTAT and friends */
+        if (get_user(sec32,             &bstime32->tv_sec)      ||
+            get_user(bstime->tv_nsec,   &bstime32->tv_nsec))
+                return -XFS_ERROR(EFAULT);
+        bstime->tv_sec = sec32;
+        return 0;
+}
+/* xfs_bstat_t has differing alignment on intel, & bstime_t sizes everywhere */
+STATIC int
+xfs_ioctl32_bstat_copyin(
+        xfs_bstat_t             *bstat,
+        compat_xfs_bstat_t      __user *bstat32)
+{
+        if (get_user(bstat->bs_ino,     &bstat32->bs_ino)       ||
+            get_user(bstat->bs_mode,    &bstat32->bs_mode)      ||
+            get_user(bstat->bs_nlink,   &bstat32->bs_nlink)     ||
+            get_user(bstat->bs_uid,     &bstat32->bs_uid)       ||
+            get_user(bstat->bs_gid,     &bstat32->bs_gid)       ||
+            get_user(bstat->bs_rdev,    &bstat32->bs_rdev)      ||
+            get_user(bstat->bs_blksize, &bstat32->bs_blksize)   ||
+            get_user(bstat->bs_size,    &bstat32->bs_size)      ||
+            xfs_ioctl32_bstime_copyin(&bstat->bs_atime, &bstat32->bs_atime) ||
+            xfs_ioctl32_bstime_copyin(&bstat->bs_mtime, &bstat32->bs_mtime) ||
+            xfs_ioctl32_bstime_copyin(&bstat->bs_ctime, &bstat32->bs_ctime) ||
+            get_user(bstat->bs_blocks,  &bstat32->bs_size)      ||
+            get_user(bstat->bs_xflags,  &bstat32->bs_size)      ||
+            get_user(bstat->bs_extsize, &bstat32->bs_extsize)   ||
+            get_user(bstat->bs_extents, &bstat32->bs_extents)   ||
+            get_user(bstat->bs_gen,     &bstat32->bs_gen)       ||
+            get_user(bstat->bs_projid,  &bstat32->bs_projid)    ||
+            get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask) ||
+            get_user(bstat->bs_dmstate, &bstat32->bs_dmstate)   ||
+            get_user(bstat->bs_aextents, &bstat32->bs_aextents))
+                return -XFS_ERROR(EFAULT);
+        return 0;
+}
-typedef struct compat_xfs_bstime {
+/* XFS_IOC_FSBULKSTAT and friends */
-        __s32           tv_sec;         /* seconds              */
-        __s32           tv_nsec;        /* and nanoseconds      */
-} compat_xfs_bstime_t;
-STATIC int xfs_bstime_store_compat(
+STATIC int
-        compat_xfs_bstime_t __user *p32,
+xfs_bstime_store_compat(
-        const xfs_bstime_t *p)
+        compat_xfs_bstime_t     __user *p32,
+        const xfs_bstime_t      *p)
 {
-        __s32 sec32;
+        __s32                   sec32;
        sec32 = p->tv_sec;
        if (put_user(sec32, &p32->tv_sec) ||
            put_user(p->tv_nsec, &p32->tv_nsec))
-                return -EFAULT;
+                return -XFS_ERROR(EFAULT);
        return 0;
 }
-typedef struct compat_xfs_bstat {
+/* Return 0 on success or positive error (to xfs_bulkstat()) */
-        __u64           bs_ino;         /* inode number                 */
+STATIC int
-        __u16           bs_mode;        /* type and mode                */
+xfs_bulkstat_one_fmt_compat(
-        __u16           bs_nlink;       /* number of links              */
-        __u32           bs_uid;         /* user id                      */
-        __u32           bs_gid;         /* group id                     */
-        __u32           bs_rdev;        /* device value                 */
-        __s32           bs_blksize;     /* block size                   */
-        __s64           bs_size;        /* file size                    */
-        compat_xfs_bstime_t bs_atime;   /* access time                  */
-        compat_xfs_bstime_t bs_mtime;   /* modify time                  */
-        compat_xfs_bstime_t bs_ctime;   /* inode change time            */
-        int64_t         bs_blocks;      /* number of blocks             */
-        __u32           bs_xflags;      /* extended flags               */
-        __s32           bs_extsize;     /* extent size                  */
-        __s32           bs_extents;     /* number of extents            */
-        __u32           bs_gen;         /* generation count             */
-        __u16           bs_projid;      /* project id                   */
-        unsigned char   bs_pad[14];     /* pad space, unused            */
-        __u32           bs_dmevmask;    /* DMIG event mask              */
-        __u16           bs_dmstate;     /* DMIG state info              */
-        __u16           bs_aextents;    /* attribute number of extents  */
-} _PACKED compat_xfs_bstat_t;
-STATIC int xfs_bulkstat_one_fmt_compat(
        void                    __user *ubuffer,
+        int                     ubsize,
+        int                     *ubused,
        const xfs_bstat_t       *buffer)
 {
-        compat_xfs_bstat_t __user *p32 = ubuffer;
+        compat_xfs_bstat_t      __user *p32 = ubuffer;
-        if (put_user(buffer->bs_ino, &p32->bs_ino) ||
+        if (ubsize < sizeof(*p32))
-            put_user(buffer->bs_mode, &p32->bs_mode) ||
+                return XFS_ERROR(ENOMEM);
-            put_user(buffer->bs_nlink, &p32->bs_nlink) ||
-            put_user(buffer->bs_uid, &p32->bs_uid) ||
+        if (put_user(buffer->bs_ino,      &p32->bs_ino)         ||
-            put_user(buffer->bs_gid, &p32->bs_gid) ||
+            put_user(buffer->bs_mode,     &p32->bs_mode)        ||
-            put_user(buffer->bs_rdev, &p32->bs_rdev) ||
+            put_user(buffer->bs_nlink,    &p32->bs_nlink)       ||
-            put_user(buffer->bs_blksize, &p32->bs_blksize) ||
+            put_user(buffer->bs_uid,      &p32->bs_uid)         ||
-            put_user(buffer->bs_size, &p32->bs_size) ||
+            put_user(buffer->bs_gid,      &p32->bs_gid)         ||
+            put_user(buffer->bs_rdev,     &p32->bs_rdev)        ||
+            put_user(buffer->bs_blksize,  &p32->bs_blksize)     ||
+            put_user(buffer->bs_size,     &p32->bs_size)        ||
            xfs_bstime_store_compat(&p32->bs_atime, &buffer->bs_atime) ||
            xfs_bstime_store_compat(&p32->bs_mtime, &buffer->bs_mtime) ||
            xfs_bstime_store_compat(&p32->bs_ctime, &buffer->bs_ctime) ||
-            put_user(buffer->bs_blocks, &p32->bs_blocks) ||
+            put_user(buffer->bs_blocks,   &p32->bs_blocks)      ||
-            put_user(buffer->bs_xflags, &p32->bs_xflags) ||
+            put_user(buffer->bs_xflags,   &p32->bs_xflags)      ||
-            put_user(buffer->bs_extsize, &p32->bs_extsize) ||
+            put_user(buffer->bs_extsize,  &p32->bs_extsize)     ||
-            put_user(buffer->bs_extents, &p32->bs_extents) ||
+            put_user(buffer->bs_extents,  &p32->bs_extents)     ||
-            put_user(buffer->bs_gen, &p32->bs_gen) ||
+            put_user(buffer->bs_gen,      &p32->bs_gen)         ||
-            put_user(buffer->bs_projid, &p32->bs_projid) ||
+            put_user(buffer->bs_projid,   &p32->bs_projid)      ||
-            put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) ||
+            put_user(buffer->bs_dmevmask, &p32->bs_dmevmask)    ||
-            put_user(buffer->bs_dmstate, &p32->bs_dmstate) ||
+            put_user(buffer->bs_dmstate,  &p32->bs_dmstate)     ||
            put_user(buffer->bs_aextents, &p32->bs_aextents))
-                return -EFAULT;
+                return XFS_ERROR(EFAULT);
-        return sizeof(*p32);
+        if (ubused)
+                *ubused = sizeof(*p32);
+        return 0;
 }
+STATIC int
+xfs_bulkstat_one_compat(
-typedef struct compat_xfs_fsop_bulkreq {
+        xfs_mount_t     *mp,            /* mount point for filesystem */
-        compat_uptr_t   lastip;         /* last inode # pointer         */
+        xfs_ino_t       ino,            /* inode number to get data for */
-        __s32           icount;         /* count of entries in buffer   */
+        void            __user *buffer, /* buffer to place output in */
-        compat_uptr_t   ubuffer;        /* user buffer for inode desc.  */
+        int             ubsize,         /* size of buffer */
-        compat_uptr_t   ocount;         /* output count pointer         */
+        void            *private_data,  /* my private data */
-} compat_xfs_fsop_bulkreq_t;
+        xfs_daddr_t     bno,            /* starting bno of inode cluster */
+        int             *ubused,        /* bytes used by me */
-#define XFS_IOC_FSBULKSTAT_32 \
+        void            *dibuff,        /* on-disk inode buffer */
-        _IOWR('X', 101, struct compat_xfs_fsop_bulkreq)
+        int             *stat)          /* BULKSTAT_RV_... */
-#define XFS_IOC_FSBULKSTAT_SINGLE_32 \
+{
-        _IOWR('X', 102, struct compat_xfs_fsop_bulkreq)
+        return xfs_bulkstat_one_int(mp, ino, buffer, ubsize,
-#define XFS_IOC_FSINUMBERS_32 \
+                                    xfs_bulkstat_one_fmt_compat, bno,
-        _IOWR('X', 103, struct compat_xfs_fsop_bulkreq)
+                                    ubused, dibuff, stat);
+}
 /* copied from xfs_ioctl.c */
 STATIC int
-xfs_ioc_bulkstat_compat(
+xfs_compat_ioc_bulkstat(
-        xfs_mount_t             *mp,
+        xfs_mount_t               *mp,
-        unsigned int            cmd,
+        unsigned int              cmd,
-        void                    __user *arg)
+        compat_xfs_fsop_bulkreq_t __user *p32)
 {
-        compat_xfs_fsop_bulkreq_t __user *p32 = (void __user *)arg;
        u32                     addr;
        xfs_fsop_bulkreq_t      bulkreq;
        int                     count;  /* # of records returned */
@@ -270,20 +263,20 @@ xfs_ioc_bulkstat_compat(
        /* should be called again (unused here, but used in dmapi) */
        if (!capable(CAP_SYS_ADMIN))
-                return -EPERM;
+                return -XFS_ERROR(EPERM);
        if (XFS_FORCED_SHUTDOWN(mp))
                return -XFS_ERROR(EIO);
        if (get_user(addr, &p32->lastip))
-                return -EFAULT;
+                return -XFS_ERROR(EFAULT);
        bulkreq.lastip = compat_ptr(addr);
        if (get_user(bulkreq.icount, &p32->icount) ||
            get_user(addr, &p32->ubuffer))
-                return -EFAULT;
+                return -XFS_ERROR(EFAULT);
        bulkreq.ubuffer = compat_ptr(addr);
        if (get_user(addr, &p32->ocount))
-                return -EFAULT;
+                return -XFS_ERROR(EFAULT);
        bulkreq.ocount = compat_ptr(addr);
        if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
@@ -295,17 +288,22 @@ xfs_ioc_bulkstat_compat(
        if (bulkreq.ubuffer == NULL)
                return -XFS_ERROR(EINVAL);
-        if (cmd == XFS_IOC_FSINUMBERS)
+        if (cmd == XFS_IOC_FSINUMBERS_32) {
                error = xfs_inumbers(mp, &inlast, &count,
                                bulkreq.ubuffer, xfs_inumbers_fmt_compat);
-        else {
+        } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE_32) {
-                /* declare a var to get a warning in case the type changes */
+                int res;
-                bulkstat_one_fmt_pf formatter = xfs_bulkstat_one_fmt_compat;
+                error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer,
+                                sizeof(compat_xfs_bstat_t),
+                                NULL, 0, NULL, NULL, &res);
+        } else if (cmd == XFS_IOC_FSBULKSTAT_32) {
                error = xfs_bulkstat(mp, &inlast, &count,
-                        xfs_bulkstat_one, formatter,
+                        xfs_bulkstat_one_compat, NULL,
                        sizeof(compat_xfs_bstat_t), bulkreq.ubuffer,
                        BULKSTAT_FG_QUICK, &done);
-        }
+        } else
+                error = XFS_ERROR(EINVAL);
        if (error)
                return -error;
@@ -321,63 +319,306 @@ xfs_ioc_bulkstat_compat(
        return 0;
 }
+STATIC int
+xfs_compat_handlereq_copyin(
+        xfs_fsop_handlereq_t            *hreq,
+        compat_xfs_fsop_handlereq_t     __user *arg32)
+{
+        compat_xfs_fsop_handlereq_t     hreq32;
+        if (copy_from_user(&hreq32, arg32, sizeof(compat_xfs_fsop_handlereq_t)))
+                return -XFS_ERROR(EFAULT);
+        hreq->fd = hreq32.fd;
+        hreq->path = compat_ptr(hreq32.path);
+        hreq->oflags = hreq32.oflags;
+        hreq->ihandle = compat_ptr(hreq32.ihandle);
+        hreq->ihandlen = hreq32.ihandlen;
+        hreq->ohandle = compat_ptr(hreq32.ohandle);
+        hreq->ohandlen = compat_ptr(hreq32.ohandlen);
+        return 0;
+}
-typedef struct compat_xfs_fsop_handlereq {
+/*
-        __u32           fd;             /* fd for FD_TO_HANDLE          */
+ * Convert userspace handle data into inode.
-        compat_uptr_t   path;           /* user pathname                */
+ *
-        __u32           oflags;         /* open flags                   */
+ * We use the fact that all the fsop_handlereq ioctl calls have a data
-        compat_uptr_t   ihandle;        /* user supplied handle         */
+ * structure argument whose first component is always a xfs_fsop_handlereq_t,
-        __u32           ihandlen;       /* user supplied length         */
+ * so we can pass that sub structure into this handy, shared routine.
-        compat_uptr_t   ohandle;        /* user buffer for handle       */
+ *
-        compat_uptr_t   ohandlen;       /* user buffer length           */
+ * If no error, caller must always iput the returned inode.
-} compat_xfs_fsop_handlereq_t;
+ */
+STATIC int
-#define XFS_IOC_PATH_TO_FSHANDLE_32 \
+xfs_vget_fsop_handlereq_compat(
-        _IOWR('X', 104, struct compat_xfs_fsop_handlereq)
+        xfs_mount_t             *mp,
-#define XFS_IOC_PATH_TO_HANDLE_32 \
+        struct inode            *parinode,      /* parent inode pointer    */
-        _IOWR('X', 105, struct compat_xfs_fsop_handlereq)
+        compat_xfs_fsop_handlereq_t     *hreq,
-#define XFS_IOC_FD_TO_HANDLE_32 \
+        struct inode            **inode)
-        _IOWR('X', 106, struct compat_xfs_fsop_handlereq)
-#define XFS_IOC_OPEN_BY_HANDLE_32 \
-        _IOWR('X', 107, struct compat_xfs_fsop_handlereq)
-#define XFS_IOC_READLINK_BY_HANDLE_32 \
-        _IOWR('X', 108, struct compat_xfs_fsop_handlereq)
-STATIC unsigned long xfs_ioctl32_fshandle(unsigned long arg)
 {
-        compat_xfs_fsop_handlereq_t __user *p32 = (void __user *)arg;
+        void                    __user *hanp;
-        xfs_fsop_handlereq_t __user *p = compat_alloc_user_space(sizeof(*p));
+        size_t                  hlen;
-        u32 addr;
+        xfs_fid_t               *xfid;
+        xfs_handle_t            *handlep;
-        if (copy_in_user(&p->fd, &p32->fd, sizeof(__u32)) ||
+        xfs_handle_t            handle;
-            get_user(addr, &p32->path) ||
+        xfs_inode_t             *ip;
-            put_user(compat_ptr(addr), &p->path) ||
+        xfs_ino_t               ino;
-            copy_in_user(&p->oflags, &p32->oflags, sizeof(__u32)) ||
+        __u32                   igen;
-            get_user(addr, &p32->ihandle) ||
+        int                     error;
-            put_user(compat_ptr(addr), &p->ihandle) ||
-            copy_in_user(&p->ihandlen, &p32->ihandlen, sizeof(__u32)) ||
+        /*
-            get_user(addr, &p32->ohandle) ||
+         * Only allow handle opens under a directory.
-            put_user(compat_ptr(addr), &p->ohandle) ||
+         */
-            get_user(addr, &p32->ohandlen) ||
+        if (!S_ISDIR(parinode->i_mode))
-            put_user(compat_ptr(addr), &p->ohandlen))
+                return XFS_ERROR(ENOTDIR);
-                return -EFAULT;
+        hanp = compat_ptr(hreq->ihandle);
-        return (unsigned long)p;
+        hlen = hreq->ihandlen;
+        handlep = &handle;
+        if (hlen < sizeof(handlep->ha_fsid) || hlen > sizeof(*handlep))
+                return XFS_ERROR(EINVAL);
+        if (copy_from_user(handlep, hanp, hlen))
+                return XFS_ERROR(EFAULT);
+        if (hlen < sizeof(*handlep))
+                memset(((char *)handlep) + hlen, 0, sizeof(*handlep) - hlen);
+        if (hlen > sizeof(handlep->ha_fsid)) {
+                if (handlep->ha_fid.fid_len !=
+                    (hlen - sizeof(handlep->ha_fsid) -
+                            sizeof(handlep->ha_fid.fid_len)) ||
+                    handlep->ha_fid.fid_pad)
+                        return XFS_ERROR(EINVAL);
+        }
+        /*
+         * Crack the handle, obtain the inode # & generation #
+         */
+        xfid = (struct xfs_fid *)&handlep->ha_fid;
+        if (xfid->fid_len == sizeof(*xfid) - sizeof(xfid->fid_len)) {
+                ino  = xfid->fid_ino;
+                igen = xfid->fid_gen;
+        } else {
+                return XFS_ERROR(EINVAL);
+        }
+        /*
+         * Get the XFS inode, building a Linux inode to go with it.
+         */
+        error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, 0);
+        if (error)
+                return error;
+        if (ip == NULL)
+                return XFS_ERROR(EIO);
+        if (ip->i_d.di_gen != igen) {
+                xfs_iput_new(ip, XFS_ILOCK_SHARED);
+                return XFS_ERROR(ENOENT);
+        }
+        xfs_iunlock(ip, XFS_ILOCK_SHARED);
+        *inode = VFS_I(ip);
+        return 0;
 }
+STATIC int
+xfs_compat_attrlist_by_handle(
+        xfs_mount_t             *mp,
+        void                    __user *arg,
+        struct inode            *parinode)
+{
+        int                     error;
+        attrlist_cursor_kern_t  *cursor;
+        compat_xfs_fsop_attrlist_handlereq_t al_hreq;
+        struct inode            *inode;
+        char                    *kbuf;
+        if (!capable(CAP_SYS_ADMIN))
+                return -XFS_ERROR(EPERM);
+        if (copy_from_user(&al_hreq, arg,
+                           sizeof(compat_xfs_fsop_attrlist_handlereq_t)))
+                return -XFS_ERROR(EFAULT);
+        if (al_hreq.buflen > XATTR_LIST_MAX)
+                return -XFS_ERROR(EINVAL);
+        /*
+         * Reject flags, only allow namespaces.
+         */
+        if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE))
+                return -XFS_ERROR(EINVAL);
+        error = xfs_vget_fsop_handlereq_compat(mp, parinode, &al_hreq.hreq,
+                                               &inode);
+        if (error)
+                goto out;
+        kbuf = kmalloc(al_hreq.buflen, GFP_KERNEL);
+        if (!kbuf)
+                goto out_vn_rele;
+        cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
+        error = xfs_attr_list(XFS_I(inode), kbuf, al_hreq.buflen,
+                                        al_hreq.flags, cursor);
+        if (error)
+                goto out_kfree;
+        if (copy_to_user(compat_ptr(al_hreq.buffer), kbuf, al_hreq.buflen))
+                error = -EFAULT;
+ out_kfree:
+        kfree(kbuf);
+ out_vn_rele:
+        iput(inode);
+ out:
+        return -error;
+}
-STATIC long
+STATIC int
-xfs_compat_ioctl(
+xfs_compat_attrmulti_by_handle(
-        int             mode,
+        xfs_mount_t                             *mp,
-        struct file     *file,
+        void                                    __user *arg,
-        unsigned        cmd,
+        struct inode                            *parinode)
-        unsigned long   arg)
+{
+        int                                     error;
+        compat_xfs_attr_multiop_t               *ops;
+        compat_xfs_fsop_attrmulti_handlereq_t   am_hreq;
+        struct inode                            *inode;
+        unsigned int                            i, size;
+        char                                    *attr_name;
+        if (!capable(CAP_SYS_ADMIN))
+                return -XFS_ERROR(EPERM);
+        if (copy_from_user(&am_hreq, arg,
+                           sizeof(compat_xfs_fsop_attrmulti_handlereq_t)))
+                return -XFS_ERROR(EFAULT);
+        error = xfs_vget_fsop_handlereq_compat(mp, parinode, &am_hreq.hreq,
+                                               &inode);
+        if (error)
+                goto out;
+        error = E2BIG;
+        size = am_hreq.opcount * sizeof(compat_xfs_attr_multiop_t);
+        if (!size || size > 16 * PAGE_SIZE)
+                goto out_vn_rele;
+        error = ENOMEM;
+        ops = kmalloc(size, GFP_KERNEL);
+        if (!ops)
+                goto out_vn_rele;
+        error = EFAULT;
+        if (copy_from_user(ops, compat_ptr(am_hreq.ops), size))
+                goto out_kfree_ops;
+        attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
+        if (!attr_name)
+                goto out_kfree_ops;
+        error = 0;
+        for (i = 0; i < am_hreq.opcount; i++) {
+                ops[i].am_error = strncpy_from_user(attr_name,
+                                compat_ptr(ops[i].am_attrname),
+                                MAXNAMELEN);
+                if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
+                        error = -ERANGE;
+                if (ops[i].am_error < 0)
+                        break;
+                switch (ops[i].am_opcode) {
+                case ATTR_OP_GET:
+                        ops[i].am_error = xfs_attrmulti_attr_get(inode,
+                                        attr_name,
+                                        compat_ptr(ops[i].am_attrvalue),
+                                        &ops[i].am_length, ops[i].am_flags);
+                        break;
+                case ATTR_OP_SET:
+                        ops[i].am_error = xfs_attrmulti_attr_set(inode,
+                                        attr_name,
+                                        compat_ptr(ops[i].am_attrvalue),
+                                        ops[i].am_length, ops[i].am_flags);
+                        break;
+                case ATTR_OP_REMOVE:
+                        ops[i].am_error = xfs_attrmulti_attr_remove(inode,
+                                        attr_name, ops[i].am_flags);
+                        break;
+                default:
+                        ops[i].am_error = EINVAL;
+                }
+        }
+        if (copy_to_user(compat_ptr(am_hreq.ops), ops, size))
+                error = XFS_ERROR(EFAULT);
+        kfree(attr_name);
+ out_kfree_ops:
+        kfree(ops);
+ out_vn_rele:
+        iput(inode);
+ out:
+        return -error;
+}
+STATIC int
+xfs_compat_fssetdm_by_handle(
+        xfs_mount_t             *mp,
+        void                    __user *arg,
+        struct inode            *parinode)
+{
+        int                     error;
+        struct fsdmidata        fsd;
+        compat_xfs_fsop_setdm_handlereq_t dmhreq;
+        struct inode            *inode;
+        if (!capable(CAP_MKNOD))
+                return -XFS_ERROR(EPERM);
+        if (copy_from_user(&dmhreq, arg,
+                           sizeof(compat_xfs_fsop_setdm_handlereq_t)))
+                return -XFS_ERROR(EFAULT);
+        error = xfs_vget_fsop_handlereq_compat(mp, parinode, &dmhreq.hreq,
+                                               &inode);
+        if (error)
+                return -error;
+        if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) {
+                error = -XFS_ERROR(EPERM);
+                goto out;
+        }
+        if (copy_from_user(&fsd, compat_ptr(dmhreq.data), sizeof(fsd))) {
+                error = -XFS_ERROR(EFAULT);
+                goto out;
+        }
+        error = -xfs_set_dmattrs(XFS_I(inode), fsd.fsd_dmevmask,
+                                 fsd.fsd_dmstate);
+out:
+        iput(inode);
+        return error;
+}
+long
+xfs_file_compat_ioctl(
+        struct file             *filp,
+        unsigned                cmd,
+        unsigned long           p)
 {
-        struct inode    *inode = file->f_path.dentry->d_inode;
+        struct inode            *inode = filp->f_path.dentry->d_inode;
-        int             error;
+        struct xfs_inode        *ip = XFS_I(inode);
+        struct xfs_mount        *mp = ip->i_mount;
+        void                    __user *arg = (void __user *)p;
+        int                     ioflags = 0;
+        int                     error;
+        if (filp->f_mode & FMODE_NOCMTIME)
+                ioflags |= IO_INVIS;
+        xfs_itrace_entry(ip);
        switch (cmd) {
+        /* No size or alignment issues on any arch */
        case XFS_IOC_DIOINFO:
        case XFS_IOC_FSGEOMETRY:
        case XFS_IOC_FSGETXATTR:
@@ -387,48 +628,18 @@ xfs_compat_ioctl(
        case XFS_IOC_GETBMAP:
        case XFS_IOC_GETBMAPA:
        case XFS_IOC_GETBMAPX:
-/* not handled
-        case XFS_IOC_FSSETDM_BY_HANDLE:
-        case XFS_IOC_ATTRLIST_BY_HANDLE:
-        case XFS_IOC_ATTRMULTI_BY_HANDLE:
-*/
        case XFS_IOC_FSCOUNTS:
        case XFS_IOC_SET_RESBLKS:
        case XFS_IOC_GET_RESBLKS:
-        case XFS_IOC_FSGROWFSDATA:
        case XFS_IOC_FSGROWFSLOG:
-        case XFS_IOC_FSGROWFSRT:
        case XFS_IOC_FREEZE:
        case XFS_IOC_THAW:
        case XFS_IOC_GOINGDOWN:
        case XFS_IOC_ERROR_INJECTION:
        case XFS_IOC_ERROR_CLEARALL:
-                break;
+                return xfs_file_ioctl(filp, cmd, p);
+#ifndef BROKEN_X86_ALIGNMENT
-        case XFS_IOC32_GETXFLAGS:
+        /* These are handled fine if no alignment issues */
-        case XFS_IOC32_SETXFLAGS:
-        case XFS_IOC32_GETVERSION:
-                cmd = _NATIVE_IOC(cmd, long);
-                break;
-#ifdef BROKEN_X86_ALIGNMENT
-        /* xfs_flock_t has wrong u32 vs u64 alignment */
-        case XFS_IOC_ALLOCSP_32:
-        case XFS_IOC_FREESP_32:
-        case XFS_IOC_ALLOCSP64_32:
-        case XFS_IOC_FREESP64_32:
-        case XFS_IOC_RESVSP_32:
-        case XFS_IOC_UNRESVSP_32:
-        case XFS_IOC_RESVSP64_32:
-        case XFS_IOC_UNRESVSP64_32:
-                arg = xfs_ioctl32_flock(arg);
-                cmd = _NATIVE_IOC(cmd, struct xfs_flock64);
-                break;
-        case XFS_IOC_FSGEOMETRY_V1_32:
-                arg = xfs_ioctl32_geom_v1(arg);
-                cmd = _NATIVE_IOC(cmd, struct xfs_fsop_geom_v1);
-                break;
-#else /* These are handled fine if no alignment issues */
        case XFS_IOC_ALLOCSP:
        case XFS_IOC_FREESP:
        case XFS_IOC_RESVSP:
@@ -438,51 +649,97 @@ xfs_compat_ioctl(
        case XFS_IOC_RESVSP64:
        case XFS_IOC_UNRESVSP64:
        case XFS_IOC_FSGEOMETRY_V1:
-                break;
+        case XFS_IOC_FSGROWFSDATA:
+        case XFS_IOC_FSGROWFSRT:
+                return xfs_file_ioctl(filp, cmd, p);
+#else
+        case XFS_IOC_ALLOCSP_32:
+        case XFS_IOC_FREESP_32:
+        case XFS_IOC_ALLOCSP64_32:
+        case XFS_IOC_FREESP64_32:
+        case XFS_IOC_RESVSP_32:
+        case XFS_IOC_UNRESVSP_32:
+        case XFS_IOC_RESVSP64_32:
+        case XFS_IOC_UNRESVSP64_32: {
+                struct xfs_flock64      bf;
-        /* xfs_bstat_t still has wrong u32 vs u64 alignment */
+                if (xfs_compat_flock64_copyin(&bf, arg))
-        case XFS_IOC_SWAPEXT:
+                        return -XFS_ERROR(EFAULT);
-                break;
+                cmd = _NATIVE_IOC(cmd, struct xfs_flock64);
+                return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf);
+        }
+        case XFS_IOC_FSGEOMETRY_V1_32:
+                return xfs_compat_ioc_fsgeometry_v1(mp, arg);
+        case XFS_IOC_FSGROWFSDATA_32: {
+                struct xfs_growfs_data  in;
+                if (xfs_compat_growfs_data_copyin(&in, arg))
+                        return -XFS_ERROR(EFAULT);
+                error = xfs_growfs_data(mp, &in);
+                return -error;
+        }
+        case XFS_IOC_FSGROWFSRT_32: {
+                struct xfs_growfs_rt    in;
+                if (xfs_compat_growfs_rt_copyin(&in, arg))
+                        return -XFS_ERROR(EFAULT);
+                error = xfs_growfs_rt(mp, &in);
+                return -error;
+        }
 #endif
+        /* long changes size, but xfs only copiese out 32 bits */
+        case XFS_IOC_GETXFLAGS_32:
+        case XFS_IOC_SETXFLAGS_32:
+        case XFS_IOC_GETVERSION_32:
+                cmd = _NATIVE_IOC(cmd, long);
+                return xfs_file_ioctl(filp, cmd, p);
+        case XFS_IOC_SWAPEXT: {
+                struct xfs_swapext        sxp;
+                struct compat_xfs_swapext __user *sxu = arg;
+                /* Bulk copy in up to the sx_stat field, then copy bstat */
+                if (copy_from_user(&sxp, sxu,
+                                   offsetof(struct xfs_swapext, sx_stat)) ||
+                    xfs_ioctl32_bstat_copyin(&sxp.sx_stat, &sxu->sx_stat))
+                        return -XFS_ERROR(EFAULT);
+                error = xfs_swapext(&sxp);
+                return -error;
+        }
        case XFS_IOC_FSBULKSTAT_32:
        case XFS_IOC_FSBULKSTAT_SINGLE_32:
        case XFS_IOC_FSINUMBERS_32:
-                cmd = _NATIVE_IOC(cmd, struct xfs_fsop_bulkreq);
+                return xfs_compat_ioc_bulkstat(mp, cmd, arg);
-                return xfs_ioc_bulkstat_compat(XFS_I(inode)->i_mount,
-                                cmd, (void __user*)arg);
        case XFS_IOC_FD_TO_HANDLE_32:
        case XFS_IOC_PATH_TO_HANDLE_32:
-        case XFS_IOC_PATH_TO_FSHANDLE_32:
+        case XFS_IOC_PATH_TO_FSHANDLE_32: {
-        case XFS_IOC_OPEN_BY_HANDLE_32:
+                struct xfs_fsop_handlereq       hreq;
-        case XFS_IOC_READLINK_BY_HANDLE_32:
-                arg = xfs_ioctl32_fshandle(arg);
+                if (xfs_compat_handlereq_copyin(&hreq, arg))
+                        return -XFS_ERROR(EFAULT);
                cmd = _NATIVE_IOC(cmd, struct xfs_fsop_handlereq);
-                break;
+                return xfs_find_handle(cmd, &hreq);
-        default:
-                return -ENOIOCTLCMD;
        }
+        case XFS_IOC_OPEN_BY_HANDLE_32: {
+                struct xfs_fsop_handlereq       hreq;
-        error = xfs_ioctl(XFS_I(inode), file, mode, cmd, (void __user *)arg);
+                if (xfs_compat_handlereq_copyin(&hreq, arg))
-        xfs_iflags_set(XFS_I(inode), XFS_IMODIFIED);
+                        return -XFS_ERROR(EFAULT);
+                return xfs_open_by_handle(mp, &hreq, filp, inode);
-        return error;
+        }
-}
+        case XFS_IOC_READLINK_BY_HANDLE_32: {
+                struct xfs_fsop_handlereq       hreq;
-long
-xfs_file_compat_ioctl(
-        struct file             *file,
-        unsigned                cmd,
-        unsigned long           arg)
-{
-        return xfs_compat_ioctl(0, file, cmd, arg);
-}
-long
+                if (xfs_compat_handlereq_copyin(&hreq, arg))
-xfs_file_compat_invis_ioctl(
+                        return -XFS_ERROR(EFAULT);
-        struct file             *file,
+                return xfs_readlink_by_handle(mp, &hreq, inode);
-        unsigned                cmd,
+        }
-        unsigned long           arg)
+        case XFS_IOC_ATTRLIST_BY_HANDLE_32:
-{
+                return xfs_compat_attrlist_by_handle(mp, arg, inode);
-        return xfs_compat_ioctl(IO_INVIS, file, cmd, arg);
+        case XFS_IOC_ATTRMULTI_BY_HANDLE_32:
+                return xfs_compat_attrmulti_by_handle(mp, arg, inode);
+        case XFS_IOC_FSSETDM_BY_HANDLE_32:
+                return xfs_compat_fssetdm_by_handle(mp, arg, inode);
+        default:
+                return -XFS_ERROR(ENOIOCTLCMD);
+        }
 }
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h
index 02de6e62ee37..1024c4f8ba0d 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.h
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.h
@@ -18,7 +18,217 @@
 #ifndef __XFS_IOCTL32_H__
 #define __XFS_IOCTL32_H__
-extern long xfs_file_compat_ioctl(struct file *, unsigned, unsigned long);
+#include <linux/compat.h>
-extern long xfs_file_compat_invis_ioctl(struct file *, unsigned, unsigned long);
+/*
+ * on 32-bit arches, ioctl argument structures may have different sizes
+ * and/or alignment.  We define compat structures which match the
+ * 32-bit sizes/alignments here, and their associated ioctl numbers.
+ *
+ * xfs_ioctl32.c contains routines to copy these structures in and out.
+ */
+/* stock kernel-level ioctls we support */
+#define XFS_IOC_GETXFLAGS_32    FS_IOC32_GETFLAGS
+#define XFS_IOC_SETXFLAGS_32    FS_IOC32_SETFLAGS
+#define XFS_IOC_GETVERSION_32   FS_IOC32_GETVERSION
+/*
+ * On intel, even if sizes match, alignment and/or padding may differ.
+ */
+#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
+#define BROKEN_X86_ALIGNMENT
+#define __compat_packed __attribute__((packed))
+#else
+#define __compat_packed
+#endif
+typedef struct compat_xfs_bstime {
+        compat_time_t   tv_sec;         /* seconds              */
+        __s32           tv_nsec;        /* and nanoseconds      */
+} compat_xfs_bstime_t;
+typedef struct compat_xfs_bstat {
+        __u64           bs_ino;         /* inode number                 */
+        __u16           bs_mode;        /* type and mode                */
+        __u16           bs_nlink;       /* number of links              */
+        __u32           bs_uid;         /* user id                      */
+        __u32           bs_gid;         /* group id                     */
+        __u32           bs_rdev;        /* device value                 */
+        __s32           bs_blksize;     /* block size                   */
+        __s64           bs_size;        /* file size                    */
+        compat_xfs_bstime_t bs_atime;   /* access time                  */
+        compat_xfs_bstime_t bs_mtime;   /* modify time                  */
+        compat_xfs_bstime_t bs_ctime;   /* inode change time            */
+        int64_t         bs_blocks;      /* number of blocks             */
+        __u32           bs_xflags;      /* extended flags               */
+        __s32           bs_extsize;     /* extent size                  */
+        __s32           bs_extents;     /* number of extents            */
+        __u32           bs_gen;         /* generation count             */
+        __u16           bs_projid;      /* project id                   */
+        unsigned char   bs_pad[14];     /* pad space, unused            */
+        __u32           bs_dmevmask;    /* DMIG event mask              */
+        __u16           bs_dmstate;     /* DMIG state info              */
+        __u16           bs_aextents;    /* attribute number of extents  */
+} __compat_packed compat_xfs_bstat_t;
+typedef struct compat_xfs_fsop_bulkreq {
+        compat_uptr_t   lastip;         /* last inode # pointer         */
+        __s32           icount;         /* count of entries in buffer   */
+        compat_uptr_t   ubuffer;        /* user buffer for inode desc.  */
+        compat_uptr_t   ocount;         /* output count pointer         */
+} compat_xfs_fsop_bulkreq_t;
+#define XFS_IOC_FSBULKSTAT_32 \
+        _IOWR('X', 101, struct compat_xfs_fsop_bulkreq)
+#define XFS_IOC_FSBULKSTAT_SINGLE_32 \
+        _IOWR('X', 102, struct compat_xfs_fsop_bulkreq)
+#define XFS_IOC_FSINUMBERS_32 \
+        _IOWR('X', 103, struct compat_xfs_fsop_bulkreq)
+typedef struct compat_xfs_fsop_handlereq {
+        __u32           fd;             /* fd for FD_TO_HANDLE          */
+        compat_uptr_t   path;           /* user pathname                */
+        __u32           oflags;         /* open flags                   */
+        compat_uptr_t   ihandle;        /* user supplied handle         */
+        __u32           ihandlen;       /* user supplied length         */
+        compat_uptr_t   ohandle;        /* user buffer for handle       */
+        compat_uptr_t   ohandlen;       /* user buffer length           */
+} compat_xfs_fsop_handlereq_t;
+#define XFS_IOC_PATH_TO_FSHANDLE_32 \
+        _IOWR('X', 104, struct compat_xfs_fsop_handlereq)
+#define XFS_IOC_PATH_TO_HANDLE_32 \
+        _IOWR('X', 105, struct compat_xfs_fsop_handlereq)
+#define XFS_IOC_FD_TO_HANDLE_32 \
+        _IOWR('X', 106, struct compat_xfs_fsop_handlereq)
+#define XFS_IOC_OPEN_BY_HANDLE_32 \
+        _IOWR('X', 107, struct compat_xfs_fsop_handlereq)
+#define XFS_IOC_READLINK_BY_HANDLE_32 \
+        _IOWR('X', 108, struct compat_xfs_fsop_handlereq)
+/* The bstat field in the swapext struct needs translation */
+typedef struct compat_xfs_swapext {
+        __int64_t               sx_version;     /* version */
+        __int64_t               sx_fdtarget;    /* fd of target file */
+        __int64_t               sx_fdtmp;       /* fd of tmp file */
+        xfs_off_t               sx_offset;      /* offset into file */
+        xfs_off_t               sx_length;      /* leng from offset */
+        char                    sx_pad[16];     /* pad space, unused */
+        compat_xfs_bstat_t      sx_stat;        /* stat of target b4 copy */
+} __compat_packed compat_xfs_swapext_t;
+#define XFS_IOC_SWAPEXT_32      _IOWR('X', 109, struct compat_xfs_swapext)
+typedef struct compat_xfs_fsop_attrlist_handlereq {
+        struct compat_xfs_fsop_handlereq hreq; /* handle interface structure */
+        struct xfs_attrlist_cursor      pos; /* opaque cookie, list offset */
+        __u32                           flags;  /* which namespace to use */
+        __u32                           buflen; /* length of buffer supplied */
+        compat_uptr_t                   buffer; /* returned names */
+} __compat_packed compat_xfs_fsop_attrlist_handlereq_t;
+/* Note: actually this is read/write */
+#define XFS_IOC_ATTRLIST_BY_HANDLE_32 \
+        _IOW('X', 122, struct compat_xfs_fsop_attrlist_handlereq)
+/* am_opcodes defined in xfs_fs.h */
+typedef struct compat_xfs_attr_multiop {
+        __u32           am_opcode;
+        __s32           am_error;
+        compat_uptr_t   am_attrname;
+        compat_uptr_t   am_attrvalue;
+        __u32           am_length;
+        __u32           am_flags;
+} compat_xfs_attr_multiop_t;
+typedef struct compat_xfs_fsop_attrmulti_handlereq {
+        struct compat_xfs_fsop_handlereq hreq; /* handle interface structure */
+        __u32                           opcount;/* count of following multiop */
+        /* ptr to compat_xfs_attr_multiop */
+        compat_uptr_t                   ops; /* attr_multi data */
+} compat_xfs_fsop_attrmulti_handlereq_t;
+#define XFS_IOC_ATTRMULTI_BY_HANDLE_32 \
+        _IOW('X', 123, struct compat_xfs_fsop_attrmulti_handlereq)
+typedef struct compat_xfs_fsop_setdm_handlereq {
+        struct compat_xfs_fsop_handlereq hreq;  /* handle information   */
+        /* ptr to struct fsdmidata */
+        compat_uptr_t                   data;   /* DMAPI data   */
+} compat_xfs_fsop_setdm_handlereq_t;
+#define XFS_IOC_FSSETDM_BY_HANDLE_32 \
+        _IOW('X', 121, struct compat_xfs_fsop_setdm_handlereq)
+#ifdef BROKEN_X86_ALIGNMENT
+/* on ia32 l_start is on a 32-bit boundary */
+typedef struct compat_xfs_flock64 {
+        __s16           l_type;
+        __s16           l_whence;
+        __s64           l_start __attribute__((packed));
+                        /* len == 0 means until end of file */
+        __s64           l_len __attribute__((packed));
+        __s32           l_sysid;
+        __u32           l_pid;
+        __s32           l_pad[4];       /* reserve area */
+} compat_xfs_flock64_t;
+#define XFS_IOC_ALLOCSP_32      _IOW('X', 10, struct compat_xfs_flock64)
+#define XFS_IOC_FREESP_32       _IOW('X', 11, struct compat_xfs_flock64)
+#define XFS_IOC_ALLOCSP64_32    _IOW('X', 36, struct compat_xfs_flock64)
+#define XFS_IOC_FREESP64_32     _IOW('X', 37, struct compat_xfs_flock64)
+#define XFS_IOC_RESVSP_32       _IOW('X', 40, struct compat_xfs_flock64)
+#define XFS_IOC_UNRESVSP_32     _IOW('X', 41, struct compat_xfs_flock64)
+#define XFS_IOC_RESVSP64_32     _IOW('X', 42, struct compat_xfs_flock64)
+#define XFS_IOC_UNRESVSP64_32   _IOW('X', 43, struct compat_xfs_flock64)
+typedef struct compat_xfs_fsop_geom_v1 {
+        __u32           blocksize;      /* filesystem (data) block size */
+        __u32           rtextsize;      /* realtime extent size         */
+        __u32           agblocks;       /* fsblocks in an AG            */
+        __u32           agcount;        /* number of allocation groups  */
+        __u32           logblocks;      /* fsblocks in the log          */
+        __u32           sectsize;       /* (data) sector size, bytes    */
+        __u32           inodesize;      /* inode size in bytes          */
+        __u32           imaxpct;        /* max allowed inode space(%)   */
+        __u64           datablocks;     /* fsblocks in data subvolume   */
+        __u64           rtblocks;       /* fsblocks in realtime subvol  */
+        __u64           rtextents;      /* rt extents in realtime subvol*/
+        __u64           logstart;       /* starting fsblock of the log  */
+        unsigned char   uuid[16];       /* unique id of the filesystem  */
+        __u32           sunit;          /* stripe unit, fsblocks        */
+        __u32           swidth;         /* stripe width, fsblocks       */
+        __s32           version;        /* structure version            */
+        __u32           flags;          /* superblock version flags     */
+        __u32           logsectsize;    /* log sector size, bytes       */
+        __u32           rtsectsize;     /* realtime sector size, bytes  */
+        __u32           dirblocksize;   /* directory block size, bytes  */
+} __attribute__((packed)) compat_xfs_fsop_geom_v1_t;
+#define XFS_IOC_FSGEOMETRY_V1_32  \
+        _IOR('X', 100, struct compat_xfs_fsop_geom_v1)
+typedef struct compat_xfs_inogrp {
+        __u64           xi_startino;    /* starting inode number        */
+        __s32           xi_alloccount;  /* # bits set in allocmask      */
+        __u64           xi_allocmask;   /* mask of allocated inodes     */
+} __attribute__((packed)) compat_xfs_inogrp_t;
+/* These growfs input structures have padding on the end, so must translate */
+typedef struct compat_xfs_growfs_data {
+        __u64           newblocks;      /* new data subvol size, fsblocks */
+        __u32           imaxpct;        /* new inode space percentage limit */
+} __attribute__((packed)) compat_xfs_growfs_data_t;
+typedef struct compat_xfs_growfs_rt {
+        __u64           newblocks;      /* new realtime size, fsblocks */
+        __u32           extsize;        /* new realtime extent size, fsblocks */
+} __attribute__((packed)) compat_xfs_growfs_rt_t;
+#define XFS_IOC_FSGROWFSDATA_32 _IOW('X', 110, struct compat_xfs_growfs_data)
+#define XFS_IOC_FSGROWFSRT_32   _IOW('X', 112, struct compat_xfs_growfs_rt)
+#endif /* BROKEN_X86_ALIGNMENT */
 #endif /* __XFS_IOCTL32_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 095d271f3434..7aa53fefc67f 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -53,6 +53,7 @@
 #include <linux/namei.h>
 #include <linux/security.h>
 #include <linux/falloc.h>
+#include <linux/fiemap.h>
 /*
 * Bring the atime in the XFS inode uptodate.
@@ -64,14 +65,14 @@ xfs_synchronize_atime(
 {
        struct inode    *inode = VFS_I(ip);
-        if (inode) {
+        if (!(inode->i_state & I_CLEAR)) {
                ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
                ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;
        }
 }
 /*
- * If the linux inode exists, mark it dirty.
+ * If the linux inode is valid, mark it dirty.
 * Used when commiting a dirty inode into a transaction so that
 * the inode will get written back by the linux code
 */
@@ -81,7 +82,7 @@ xfs_mark_inode_dirty_sync(
 {
        struct inode    *inode = VFS_I(ip);
-        if (inode)
+        if (!(inode->i_state & (I_WILL_FREE|I_FREEING|I_CLEAR)))
                mark_inode_dirty_sync(inode);
 }
@@ -128,7 +129,7 @@ xfs_ichgtime(
        if (sync_it) {
                SYNCHRONIZE();
                ip->i_update_core = 1;
-                mark_inode_dirty_sync(inode);
+                xfs_mark_inode_dirty_sync(ip);
        }
 }
@@ -158,8 +159,6 @@ xfs_init_security(
        }
        error = xfs_attr_set(ip, name, value, length, ATTR_SECURE);
-        if (!error)
-                xfs_iflags_set(ip, XFS_IMODIFIED);
        kfree(name);
        kfree(value);
@@ -260,7 +259,6 @@ xfs_vn_mknod(
                error = _ACL_INHERIT(inode, mode, default_acl);
                if (unlikely(error))
                        goto out_cleanup_inode;
-                xfs_iflags_set(ip, XFS_IMODIFIED);
                _ACL_FREE(default_acl);
        }
@@ -366,21 +364,17 @@ xfs_vn_link(
        struct inode    *dir,
        struct dentry   *dentry)
 {
-        struct inode    *inode; /* inode of guy being linked to */
+        struct inode    *inode = old_dentry->d_inode;
        struct xfs_name name;
        int             error;
-        inode = old_dentry->d_inode;
        xfs_dentry_to_name(&name, dentry);
-        igrab(inode);
        error = xfs_link(XFS_I(dir), XFS_I(inode), &name);
-        if (unlikely(error)) {
+        if (unlikely(error))
-                iput(inode);
                return -error;
-        }
-        xfs_iflags_set(XFS_I(dir), XFS_IMODIFIED);
+        atomic_inc(&inode->i_count);
        d_instantiate(dentry, inode);
        return 0;
 }
@@ -601,7 +595,7 @@ xfs_vn_setattr(
        struct dentry   *dentry,
        struct iattr    *iattr)
 {
-        return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0, NULL);
+        return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0);
 }
 /*
@@ -642,7 +636,7 @@ xfs_vn_fallocate(
        xfs_ilock(ip, XFS_IOLOCK_EXCL);
        error = xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf,
-                                      0, NULL, XFS_ATTR_NOLOCK);
+                                      0, XFS_ATTR_NOLOCK);
        if (!error && !(mode & FALLOC_FL_KEEP_SIZE) &&
            offset + len > i_size_read(inode))
                new_size = offset + len;
@@ -653,7 +647,7 @@ xfs_vn_fallocate(
                iattr.ia_valid = ATTR_SIZE;
                iattr.ia_size = new_size;
-                error = xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK, NULL);
+                error = xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK);
        }
        xfs_iunlock(ip, XFS_IOLOCK_EXCL);
@@ -661,6 +655,88 @@ out_error:
        return error;
 }
+#define XFS_FIEMAP_FLAGS        (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
+/*
+ * Call fiemap helper to fill in user data.
+ * Returns positive errors to xfs_getbmap.
+ */
+STATIC int
+xfs_fiemap_format(
+        void                    **arg,
+        struct getbmapx         *bmv,
+        int                     *full)
+{
+        int                     error;
+        struct fiemap_extent_info *fieinfo = *arg;
+        u32                     fiemap_flags = 0;
+        u64                     logical, physical, length;
+        /* Do nothing for a hole */
+        if (bmv->bmv_block == -1LL)
+                return 0;
+        logical = BBTOB(bmv->bmv_offset);
+        physical = BBTOB(bmv->bmv_block);
+        length = BBTOB(bmv->bmv_length);
+        if (bmv->bmv_oflags & BMV_OF_PREALLOC)
+                fiemap_flags |= FIEMAP_EXTENT_UNWRITTEN;
+        else if (bmv->bmv_oflags & BMV_OF_DELALLOC) {
+                fiemap_flags |= FIEMAP_EXTENT_DELALLOC;
+                physical = 0;   /* no block yet */
+        }
+        if (bmv->bmv_oflags & BMV_OF_LAST)
+                fiemap_flags |= FIEMAP_EXTENT_LAST;
+        error = fiemap_fill_next_extent(fieinfo, logical, physical,
+                                        length, fiemap_flags);
+        if (error > 0) {
+                error = 0;
+                *full = 1;      /* user array now full */
+        }
+        return -error;
+}
+STATIC int
+xfs_vn_fiemap(
+        struct inode            *inode,
+        struct fiemap_extent_info *fieinfo,
+        u64                     start,
+        u64                     length)
+{
+        xfs_inode_t             *ip = XFS_I(inode);
+        struct getbmapx         bm;
+        int                     error;
+        error = fiemap_check_flags(fieinfo, XFS_FIEMAP_FLAGS);
+        if (error)
+                return error;
+        /* Set up bmap header for xfs internal routine */
+        bm.bmv_offset = BTOBB(start);
+        /* Special case for whole file */
+        if (length == FIEMAP_MAX_OFFSET)
+                bm.bmv_length = -1LL;
+        else
+                bm.bmv_length = BTOBB(length);
+        /* our formatter will tell xfs_getbmap when to stop. */
+        bm.bmv_count = MAXEXTNUM;
+        bm.bmv_iflags = BMV_IF_PREALLOC;
+        if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR)
+                bm.bmv_iflags |= BMV_IF_ATTRFORK;
+        if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC))
+                bm.bmv_iflags |= BMV_IF_DELALLOC;
+        error = xfs_getbmap(ip, &bm, xfs_fiemap_format, fieinfo);
+        if (error)
+                return -error;
+        return 0;
+}
 static const struct inode_operations xfs_inode_operations = {
        .permission             = xfs_vn_permission,
        .truncate               = xfs_vn_truncate,
@@ -671,6 +747,7 @@ static const struct inode_operations xfs_inode_operations = {
        .removexattr            = generic_removexattr,
        .listxattr              = xfs_vn_listxattr,
        .fallocate              = xfs_vn_fallocate,
+        .fiemap                 = xfs_vn_fiemap,
 };
 static const struct inode_operations xfs_dir_inode_operations = {
@@ -766,12 +843,20 @@ xfs_diflags_to_iflags(
 * When reading existing inodes from disk this is called directly
 * from xfs_iget, when creating a new inode it is called from
 * xfs_ialloc after setting up the inode.
+ *
+ * We are always called with an uninitialised linux inode here.
+ * We need to initialise the necessary fields and take a reference
+ * on it.
 */
 void
 xfs_setup_inode(
        struct xfs_inode        *ip)
 {
-        struct inode            *inode = ip->i_vnode;
+        struct inode            *inode = &ip->i_vnode;
+        inode->i_ino = ip->i_ino;
+        inode->i_state = I_NEW|I_LOCK;
+        inode_add_to_lists(ip->i_mount->m_super, inode);
        inode->i_mode   = ip->i_d.di_mode;
        inode->i_nlink  = ip->i_d.di_nlink;
@@ -799,7 +884,6 @@ xfs_setup_inode(
        inode->i_ctime.tv_sec   = ip->i_d.di_ctime.t_sec;
        inode->i_ctime.tv_nsec  = ip->i_d.di_ctime.t_nsec;
        xfs_diflags_to_iflags(inode, ip);
-        xfs_iflags_clear(ip, XFS_IMODIFIED);
        switch (inode->i_mode & S_IFMT) {
        case S_IFREG:
diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h
index 8b1a1e31dc21..ef41c92ce66e 100644
--- a/fs/xfs/linux-2.6/xfs_iops.h
+++ b/fs/xfs/linux-2.6/xfs_iops.h
@@ -22,7 +22,6 @@ struct xfs_inode;
 extern const struct file_operations xfs_file_operations;
 extern const struct file_operations xfs_dir_file_operations;
-extern const struct file_operations xfs_invis_file_operations;
 extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size);
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index cc0f7b3a9795..507492d6dccd 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -21,18 +21,12 @@
 #include <linux/types.h>
 /*
- * Some types are conditional depending on the target system.
 * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits.
- * XFS_BIG_INUMS needs the VFS inode number to be 64 bits, as well
+ * XFS_BIG_INUMS requires XFS_BIG_BLKNOS to be set.
- * as requiring XFS_BIG_BLKNOS to be set.
 */
 #if defined(CONFIG_LBD) || (BITS_PER_LONG == 64)
 # define XFS_BIG_BLKNOS 1
-# if BITS_PER_LONG == 64
+# define XFS_BIG_INUMS  1
-#  define XFS_BIG_INUMS 1
-# else
-#  define XFS_BIG_INUMS 0
-# endif
 #else
 # define XFS_BIG_BLKNOS 0
 # define XFS_BIG_INUMS  0
@@ -77,6 +71,7 @@
 #include <linux/spinlock.h>
 #include <linux/random.h>
 #include <linux/ctype.h>
+#include <linux/writeback.h>
 #include <asm/page.h>
 #include <asm/div64.h>
@@ -85,7 +80,6 @@
 #include <asm/byteorder.h>
 #include <asm/unaligned.h>
-#include <xfs_vfs.h>
 #include <xfs_cred.h>
 #include <xfs_vnode.h>
 #include <xfs_stats.h>
@@ -107,7 +101,6 @@
 #undef  HAVE_PERCPU_SB  /* per cpu superblock counters are a 2.6 feature */
 #endif
-#define restricted_chown        xfs_params.restrict_chown.val
 #define irix_sgid_inherit       xfs_params.sgid_inherit.val
 #define irix_symlink_mode       xfs_params.symlink_mode.val
 #define xfs_panic_mask          xfs_params.panic_mask.val
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 1957e5357d04..7e90daa0d1d1 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -51,7 +51,6 @@
 #include "xfs_vnodeops.h"
 #include <linux/capability.h>
-#include <linux/mount.h>
 #include <linux/writeback.h>
@@ -243,7 +242,7 @@ xfs_read(
        if (unlikely(ioflags & IO_ISDIRECT)) {
                if (inode->i_mapping->nrpages)
-                        ret = xfs_flushinval_pages(ip, (*offset & PAGE_CACHE_MASK),
+                        ret = -xfs_flushinval_pages(ip, (*offset & PAGE_CACHE_MASK),
                                                    -1, FI_REMAPF_LOCKED);
                mutex_unlock(&inode->i_mutex);
                if (ret) {
@@ -668,15 +667,8 @@ start:
        if (new_size > xip->i_size)
                xip->i_new_size = new_size;
-        /*
+        if (likely(!(ioflags & IO_INVIS)))
-         * We're not supposed to change timestamps in readonly-mounted
-         * filesystems.  Throw it away if anyone asks us.
-         */
-        if (likely(!(ioflags & IO_INVIS) &&
-                   !mnt_want_write(file->f_path.mnt))) {
                xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
-                mnt_drop_write(file->f_path.mnt);
-        }
        /*
         * If the offset is beyond the size of the file, we have a couple
@@ -715,7 +707,6 @@ start:
                }
        }
-retry:
        /* We can write back this queue in page reclaim */
        current->backing_dev_info = mapping->backing_dev_info;
@@ -771,6 +762,17 @@ retry:
        if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO))
                ret = wait_on_sync_kiocb(iocb);
+        isize = i_size_read(inode);
+        if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize))
+                *offset = isize;
+        if (*offset > xip->i_size) {
+                xfs_ilock(xip, XFS_ILOCK_EXCL);
+                if (*offset > xip->i_size)
+                        xip->i_size = *offset;
+                xfs_iunlock(xip, XFS_ILOCK_EXCL);
+        }
        if (ret == -ENOSPC &&
            DM_EVENT_ENABLED(xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) {
                xfs_iunlock(xip, iolock);
@@ -784,20 +786,7 @@ retry:
                xfs_ilock(xip, iolock);
                if (error)
                        goto out_unlock_internal;
-                pos = xip->i_size;
+                goto start;
-                ret = 0;
-                goto retry;
-        }
-        isize = i_size_read(inode);
-        if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize))
-                *offset = isize;
-        if (*offset > xip->i_size) {
-                xfs_ilock(xip, XFS_ILOCK_EXCL);
-                if (*offset > xip->i_size)
-                        xip->i_size = *offset;
-                xfs_iunlock(xip, XFS_ILOCK_EXCL);
        }
        error = -ret;
@@ -855,13 +844,7 @@ retry:
 int
 xfs_bdstrat_cb(struct xfs_buf *bp)
 {
-        xfs_mount_t     *mp;
+        if (XFS_FORCED_SHUTDOWN(bp->b_mount)) {
-        mp = XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *);
-        if (!XFS_FORCED_SHUTDOWN(mp)) {
-                xfs_buf_iorequest(bp);
-                return 0;
-        } else {
                xfs_buftrace("XFS__BDSTRAT IOERROR", bp);
                /*
                 * Metadata write that didn't get logged but
@@ -874,6 +857,9 @@ xfs_bdstrat_cb(struct xfs_buf *bp)
                else
                        return (xfs_bioerror(bp));
        }
+        xfs_buf_iorequest(bp);
+        return 0;
 }
 /*
diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c
index 3d5b67c075c7..c3526d445f6a 100644
--- a/fs/xfs/linux-2.6/xfs_stats.c
+++ b/fs/xfs/linux-2.6/xfs_stats.c
@@ -53,11 +53,15 @@ xfs_read_xfsstats(
                { "icluster",           XFSSTAT_END_INODE_CLUSTER       },
                { "vnodes",             XFSSTAT_END_VNODE_OPS           },
                { "buf",                XFSSTAT_END_BUF                 },
+                { "abtb2",              XFSSTAT_END_ABTB_V2             },
+                { "abtc2",              XFSSTAT_END_ABTC_V2             },
+                { "bmbt2",              XFSSTAT_END_BMBT_V2             },
+                { "ibt2",               XFSSTAT_END_IBT_V2              },
        };
        /* Loop over all stats groups */
        for (i=j=len = 0; i < ARRAY_SIZE(xstats); i++) {
-                len += sprintf(buffer + len, xstats[i].desc);
+                len += sprintf(buffer + len, "%s", xstats[i].desc);
                /* inner loop does each group */
                while (j < xstats[i].endpoint) {
                        val = 0;
diff --git a/fs/xfs/linux-2.6/xfs_stats.h b/fs/xfs/linux-2.6/xfs_stats.h
index e83820febc9f..736854b1ca1a 100644
--- a/fs/xfs/linux-2.6/xfs_stats.h
+++ b/fs/xfs/linux-2.6/xfs_stats.h
@@ -118,6 +118,71 @@ struct xfsstats {
        __uint32_t              xb_page_retries;
        __uint32_t              xb_page_found;
        __uint32_t              xb_get_read;
+/* Version 2 btree counters */
+#define XFSSTAT_END_ABTB_V2             (XFSSTAT_END_BUF+15)
+        __uint32_t              xs_abtb_2_lookup;
+        __uint32_t              xs_abtb_2_compare;
+        __uint32_t              xs_abtb_2_insrec;
+        __uint32_t              xs_abtb_2_delrec;
+        __uint32_t              xs_abtb_2_newroot;
+        __uint32_t              xs_abtb_2_killroot;
+        __uint32_t              xs_abtb_2_increment;
+        __uint32_t              xs_abtb_2_decrement;
+        __uint32_t              xs_abtb_2_lshift;
+        __uint32_t              xs_abtb_2_rshift;
+        __uint32_t              xs_abtb_2_split;
+        __uint32_t              xs_abtb_2_join;
+        __uint32_t              xs_abtb_2_alloc;
+        __uint32_t              xs_abtb_2_free;
+        __uint32_t              xs_abtb_2_moves;
+#define XFSSTAT_END_ABTC_V2             (XFSSTAT_END_ABTB_V2+15)
+        __uint32_t              xs_abtc_2_lookup;
+        __uint32_t              xs_abtc_2_compare;
+        __uint32_t              xs_abtc_2_insrec;
+        __uint32_t              xs_abtc_2_delrec;
+        __uint32_t              xs_abtc_2_newroot;
+        __uint32_t              xs_abtc_2_killroot;
+        __uint32_t              xs_abtc_2_increment;
+        __uint32_t              xs_abtc_2_decrement;
+        __uint32_t              xs_abtc_2_lshift;
+        __uint32_t              xs_abtc_2_rshift;
+        __uint32_t              xs_abtc_2_split;
+        __uint32_t              xs_abtc_2_join;
+        __uint32_t              xs_abtc_2_alloc;
+        __uint32_t              xs_abtc_2_free;
+        __uint32_t              xs_abtc_2_moves;
+#define XFSSTAT_END_BMBT_V2             (XFSSTAT_END_ABTC_V2+15)
+        __uint32_t              xs_bmbt_2_lookup;
+        __uint32_t              xs_bmbt_2_compare;
+        __uint32_t              xs_bmbt_2_insrec;
+        __uint32_t              xs_bmbt_2_delrec;
+        __uint32_t              xs_bmbt_2_newroot;
+        __uint32_t              xs_bmbt_2_killroot;
+        __uint32_t              xs_bmbt_2_increment;
+        __uint32_t              xs_bmbt_2_decrement;
+        __uint32_t              xs_bmbt_2_lshift;
+        __uint32_t              xs_bmbt_2_rshift;
+        __uint32_t              xs_bmbt_2_split;
+        __uint32_t              xs_bmbt_2_join;
+        __uint32_t              xs_bmbt_2_alloc;
+        __uint32_t              xs_bmbt_2_free;
+        __uint32_t              xs_bmbt_2_moves;
+#define XFSSTAT_END_IBT_V2              (XFSSTAT_END_BMBT_V2+15)
+        __uint32_t              xs_ibt_2_lookup;
+        __uint32_t              xs_ibt_2_compare;
+        __uint32_t              xs_ibt_2_insrec;
+        __uint32_t              xs_ibt_2_delrec;
+        __uint32_t              xs_ibt_2_newroot;
+        __uint32_t              xs_ibt_2_killroot;
+        __uint32_t              xs_ibt_2_increment;
+        __uint32_t              xs_ibt_2_decrement;
+        __uint32_t              xs_ibt_2_lshift;
+        __uint32_t              xs_ibt_2_rshift;
+        __uint32_t              xs_ibt_2_split;
+        __uint32_t              xs_ibt_2_join;
+        __uint32_t              xs_ibt_2_alloc;
+        __uint32_t              xs_ibt_2_free;
+        __uint32_t              xs_ibt_2_moves;
 /* Extra precision counters */
        __uint64_t              xs_xstrat_bytes;
        __uint64_t              xs_write_bytes;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 37ebe36056eb..36f6cc703ef2 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -18,7 +18,6 @@
 #include "xfs.h"
 #include "xfs_bit.h"
 #include "xfs_log.h"
-#include "xfs_clnt.h"
 #include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
@@ -36,6 +35,7 @@
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_btree.h"
+#include "xfs_btree_trace.h"
 #include "xfs_ialloc.h"
 #include "xfs_bmap.h"
 #include "xfs_rtalloc.h"
@@ -48,7 +48,6 @@
 #include "xfs_buf_item.h"
 #include "xfs_utils.h"
 #include "xfs_vnodeops.h"
-#include "xfs_vfsops.h"
 #include "xfs_version.h"
 #include "xfs_log_priv.h"
 #include "xfs_trans_priv.h"
@@ -58,6 +57,7 @@
 #include "xfs_extfree_item.h"
 #include "xfs_mru_cache.h"
 #include "xfs_inode_item.h"
+#include "xfs_sync.h"
 #include <linux/namei.h>
 #include <linux/init.h>
@@ -70,36 +70,9 @@
 static struct quotactl_ops xfs_quotactl_operations;
 static struct super_operations xfs_super_operations;
-static kmem_zone_t *xfs_vnode_zone;
 static kmem_zone_t *xfs_ioend_zone;
 mempool_t *xfs_ioend_pool;
-STATIC struct xfs_mount_args *
-xfs_args_allocate(
-        struct super_block      *sb,
-        int                     silent)
-{
-        struct xfs_mount_args   *args;
-        args = kzalloc(sizeof(struct xfs_mount_args), GFP_KERNEL);
-        if (!args)
-                return NULL;
-        args->logbufs = args->logbufsize = -1;
-        strncpy(args->fsname, sb->s_id, MAXNAMELEN);
-        /* Copy the already-parsed mount(2) flags we're interested in */
-        if (sb->s_flags & MS_DIRSYNC)
-                args->flags |= XFSMNT_DIRSYNC;
-        if (sb->s_flags & MS_SYNCHRONOUS)
-                args->flags |= XFSMNT_WSYNC;
-        if (silent)
-                args->flags |= XFSMNT_QUIET;
-        args->flags |= XFSMNT_32BITINODES;
-        return args;
-}
 #define MNTOPT_LOGBUFS  "logbufs"       /* number of XFS log buffers */
 #define MNTOPT_LOGBSIZE "logbsize"      /* size of XFS log buffers */
 #define MNTOPT_LOGDEV   "logdev"        /* log device */
@@ -188,26 +161,54 @@ suffix_strtoul(char *s, char **endp, unsigned int base)
        return simple_strtoul((const char *)s, endp, base) << shift_left_factor;
 }
+/*
+ * This function fills in xfs_mount_t fields based on mount args.
+ * Note: the superblock has _not_ yet been read in.
+ *
+ * Note that this function leaks the various device name allocations on
+ * failure.  The caller takes care of them.
+ */
 STATIC int
 xfs_parseargs(
        struct xfs_mount        *mp,
        char                    *options,
-        struct xfs_mount_args   *args,
+        char                    **mtpt)
-        int                     update)
 {
+        struct super_block      *sb = mp->m_super;
        char                    *this_char, *value, *eov;
-        int                     dsunit, dswidth, vol_dsunit, vol_dswidth;
+        int                     dsunit = 0;
-        int                     iosize;
+        int                     dswidth = 0;
+        int                     iosize = 0;
        int                     dmapi_implies_ikeep = 1;
+        uchar_t                 iosizelog = 0;
+        /*
+         * Copy binary VFS mount flags we are interested in.
+         */
+        if (sb->s_flags & MS_RDONLY)
+                mp->m_flags |= XFS_MOUNT_RDONLY;
+        if (sb->s_flags & MS_DIRSYNC)
+                mp->m_flags |= XFS_MOUNT_DIRSYNC;
+        if (sb->s_flags & MS_SYNCHRONOUS)
+                mp->m_flags |= XFS_MOUNT_WSYNC;
+        /*
+         * Set some default flags that could be cleared by the mount option
+         * parsing.
+         */
+        mp->m_flags |= XFS_MOUNT_BARRIER;
+        mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
+        mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
-        args->flags |= XFSMNT_BARRIER;
+        /*
-        args->flags2 |= XFSMNT2_COMPAT_IOSIZE;
+         * These can be overridden by the mount option parsing.
+         */
+        mp->m_logbufs = -1;
+        mp->m_logbsize = -1;
        if (!options)
                goto done;
-        iosize = dsunit = dswidth = vol_dsunit = vol_dswidth = 0;
        while ((this_char = strsep(&options, ",")) != NULL) {
                if (!*this_char)
                        continue;
@@ -221,7 +222,7 @@ xfs_parseargs(
                                        this_char);
                                return EINVAL;
                        }
-                        args->logbufs = simple_strtoul(value, &eov, 10);
+                        mp->m_logbufs = simple_strtoul(value, &eov, 10);
                } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) {
                        if (!value || !*value) {
                                cmn_err(CE_WARN,
@@ -229,7 +230,7 @@ xfs_parseargs(
                                        this_char);
                                return EINVAL;
                        }
-                        args->logbufsize = suffix_strtoul(value, &eov, 10);
+                        mp->m_logbsize = suffix_strtoul(value, &eov, 10);
                } else if (!strcmp(this_char, MNTOPT_LOGDEV)) {
                        if (!value || !*value) {
                                cmn_err(CE_WARN,
@@ -237,7 +238,9 @@ xfs_parseargs(
                                        this_char);
                                return EINVAL;
                        }
-                        strncpy(args->logname, value, MAXNAMELEN);
+                        mp->m_logname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
+                        if (!mp->m_logname)
+                                return ENOMEM;
                } else if (!strcmp(this_char, MNTOPT_MTPT)) {
                        if (!value || !*value) {
                                cmn_err(CE_WARN,
@@ -245,7 +248,9 @@ xfs_parseargs(
                                        this_char);
                                return EINVAL;
                        }
-                        strncpy(args->mtpt, value, MAXNAMELEN);
+                        *mtpt = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
+                        if (!*mtpt)
+                                return ENOMEM;
                } else if (!strcmp(this_char, MNTOPT_RTDEV)) {
                        if (!value || !*value) {
                                cmn_err(CE_WARN,
@@ -253,7 +258,9 @@ xfs_parseargs(
                                        this_char);
                                return EINVAL;
                        }
-                        strncpy(args->rtname, value, MAXNAMELEN);
+                        mp->m_rtname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
+                        if (!mp->m_rtname)
+                                return ENOMEM;
                } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) {
                        if (!value || !*value) {
                                cmn_err(CE_WARN,
@@ -262,8 +269,7 @@ xfs_parseargs(
                                return EINVAL;
                        }
                        iosize = simple_strtoul(value, &eov, 10);
-                        args->flags |= XFSMNT_IOSIZE;
+                        iosizelog = ffs(iosize) - 1;
-                        args->iosizelog = (uint8_t) iosize;
                } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) {
                        if (!value || !*value) {
                                cmn_err(CE_WARN,
@@ -272,8 +278,7 @@ xfs_parseargs(
                                return EINVAL;
                        }
                        iosize = suffix_strtoul(value, &eov, 10);
-                        args->flags |= XFSMNT_IOSIZE;
+                        iosizelog = ffs(iosize) - 1;
-                        args->iosizelog = ffs(iosize) - 1;
                } else if (!strcmp(this_char, MNTOPT_GRPID) ||
                           !strcmp(this_char, MNTOPT_BSDGROUPS)) {
                        mp->m_flags |= XFS_MOUNT_GRPID;
@@ -281,23 +286,25 @@ xfs_parseargs(
                           !strcmp(this_char, MNTOPT_SYSVGROUPS)) {
                        mp->m_flags &= ~XFS_MOUNT_GRPID;
                } else if (!strcmp(this_char, MNTOPT_WSYNC)) {
-                        args->flags |= XFSMNT_WSYNC;
+                        mp->m_flags |= XFS_MOUNT_WSYNC;
                } else if (!strcmp(this_char, MNTOPT_OSYNCISOSYNC)) {
-                        args->flags |= XFSMNT_OSYNCISOSYNC;
+                        mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC;
                } else if (!strcmp(this_char, MNTOPT_NORECOVERY)) {
-                        args->flags |= XFSMNT_NORECOVERY;
+                        mp->m_flags |= XFS_MOUNT_NORECOVERY;
                } else if (!strcmp(this_char, MNTOPT_INO64)) {
-                        args->flags |= XFSMNT_INO64;
+#if XFS_BIG_INUMS
-#if !XFS_BIG_INUMS
+                        mp->m_flags |= XFS_MOUNT_INO64;
+                        mp->m_inoadd = XFS_INO64_OFFSET;
+#else
                        cmn_err(CE_WARN,
                                "XFS: %s option not allowed on this system",
                                this_char);
                        return EINVAL;
 #endif
                } else if (!strcmp(this_char, MNTOPT_NOALIGN)) {
-                        args->flags |= XFSMNT_NOALIGN;
+                        mp->m_flags |= XFS_MOUNT_NOALIGN;
                } else if (!strcmp(this_char, MNTOPT_SWALLOC)) {
-                        args->flags |= XFSMNT_SWALLOC;
+                        mp->m_flags |= XFS_MOUNT_SWALLOC;
                } else if (!strcmp(this_char, MNTOPT_SUNIT)) {
                        if (!value || !*value) {
                                cmn_err(CE_WARN,
@@ -315,7 +322,7 @@ xfs_parseargs(
                        }
                        dswidth = simple_strtoul(value, &eov, 10);
                } else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
-                        args->flags &= ~XFSMNT_32BITINODES;
+                        mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
 #if !XFS_BIG_INUMS
                        cmn_err(CE_WARN,
                                "XFS: %s option not allowed on this system",
@@ -323,56 +330,61 @@ xfs_parseargs(
                        return EINVAL;
 #endif
                } else if (!strcmp(this_char, MNTOPT_NOUUID)) {
-                        args->flags |= XFSMNT_NOUUID;
+                        mp->m_flags |= XFS_MOUNT_NOUUID;
                } else if (!strcmp(this_char, MNTOPT_BARRIER)) {
-                        args->flags |= XFSMNT_BARRIER;
+                        mp->m_flags |= XFS_MOUNT_BARRIER;
                } else if (!strcmp(this_char, MNTOPT_NOBARRIER)) {
-                        args->flags &= ~XFSMNT_BARRIER;
+                        mp->m_flags &= ~XFS_MOUNT_BARRIER;
                } else if (!strcmp(this_char, MNTOPT_IKEEP)) {
-                        args->flags |= XFSMNT_IKEEP;
+                        mp->m_flags |= XFS_MOUNT_IKEEP;
                } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
                        dmapi_implies_ikeep = 0;
-                        args->flags &= ~XFSMNT_IKEEP;
+                        mp->m_flags &= ~XFS_MOUNT_IKEEP;
                } else if (!strcmp(this_char, MNTOPT_LARGEIO)) {
-                        args->flags2 &= ~XFSMNT2_COMPAT_IOSIZE;
+                        mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE;
                } else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) {
-                        args->flags2 |= XFSMNT2_COMPAT_IOSIZE;
+                        mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
                } else if (!strcmp(this_char, MNTOPT_ATTR2)) {
-                        args->flags |= XFSMNT_ATTR2;
+                        mp->m_flags |= XFS_MOUNT_ATTR2;
                } else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
-                        args->flags &= ~XFSMNT_ATTR2;
+                        mp->m_flags &= ~XFS_MOUNT_ATTR2;
-                        args->flags |= XFSMNT_NOATTR2;
+                        mp->m_flags |= XFS_MOUNT_NOATTR2;
                } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
-                        args->flags2 |= XFSMNT2_FILESTREAMS;
+                        mp->m_flags |= XFS_MOUNT_FILESTREAMS;
                } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) {
-                        args->flags &= ~(XFSMNT_UQUOTAENF|XFSMNT_UQUOTA);
+                        mp->m_qflags &= ~(XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
-                        args->flags &= ~(XFSMNT_GQUOTAENF|XFSMNT_GQUOTA);
+                                          XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
+                                          XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
+                                          XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD);
                } else if (!strcmp(this_char, MNTOPT_QUOTA) ||
                           !strcmp(this_char, MNTOPT_UQUOTA) ||
                           !strcmp(this_char, MNTOPT_USRQUOTA)) {
-                        args->flags |= XFSMNT_UQUOTA | XFSMNT_UQUOTAENF;
+                        mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
+                                         XFS_UQUOTA_ENFD);
                } else if (!strcmp(this_char, MNTOPT_QUOTANOENF) ||
                           !strcmp(this_char, MNTOPT_UQUOTANOENF)) {
-                        args->flags |= XFSMNT_UQUOTA;
+                        mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
-                        args->flags &= ~XFSMNT_UQUOTAENF;
+                        mp->m_qflags &= ~XFS_UQUOTA_ENFD;
                } else if (!strcmp(this_char, MNTOPT_PQUOTA) ||
                           !strcmp(this_char, MNTOPT_PRJQUOTA)) {
-                        args->flags |= XFSMNT_PQUOTA | XFSMNT_PQUOTAENF;
+                        mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
+                                         XFS_OQUOTA_ENFD);
                } else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) {
-                        args->flags |= XFSMNT_PQUOTA;
+                        mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
-                        args->flags &= ~XFSMNT_PQUOTAENF;
+                        mp->m_qflags &= ~XFS_OQUOTA_ENFD;
                } else if (!strcmp(this_char, MNTOPT_GQUOTA) ||
                           !strcmp(this_char, MNTOPT_GRPQUOTA)) {
-                        args->flags |= XFSMNT_GQUOTA | XFSMNT_GQUOTAENF;
+                        mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
+                                         XFS_OQUOTA_ENFD);
                } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) {
-                        args->flags |= XFSMNT_GQUOTA;
+                        mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
-                        args->flags &= ~XFSMNT_GQUOTAENF;
+                        mp->m_qflags &= ~XFS_OQUOTA_ENFD;
                } else if (!strcmp(this_char, MNTOPT_DMAPI)) {
-                        args->flags |= XFSMNT_DMAPI;
+                        mp->m_flags |= XFS_MOUNT_DMAPI;
                } else if (!strcmp(this_char, MNTOPT_XDSM)) {
-                        args->flags |= XFSMNT_DMAPI;
+                        mp->m_flags |= XFS_MOUNT_DMAPI;
                } else if (!strcmp(this_char, MNTOPT_DMI)) {
-                        args->flags |= XFSMNT_DMAPI;
+                        mp->m_flags |= XFS_MOUNT_DMAPI;
                } else if (!strcmp(this_char, "ihashsize")) {
                        cmn_err(CE_WARN,
        "XFS: ihashsize no longer used, option is deprecated.");
@@ -390,27 +402,29 @@ xfs_parseargs(
                }
        }
-        if (args->flags & XFSMNT_NORECOVERY) {
+        /*
-                if ((mp->m_flags & XFS_MOUNT_RDONLY) == 0) {
+         * no recovery flag requires a read-only mount
-                        cmn_err(CE_WARN,
+         */
-                                "XFS: no-recovery mounts must be read-only.");
+        if ((mp->m_flags & XFS_MOUNT_NORECOVERY) &&
-                        return EINVAL;
+            !(mp->m_flags & XFS_MOUNT_RDONLY)) {
-                }
+                cmn_err(CE_WARN, "XFS: no-recovery mounts must be read-only.");
+                return EINVAL;
        }
-        if ((args->flags & XFSMNT_NOALIGN) && (dsunit || dswidth)) {
+        if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) {
                cmn_err(CE_WARN,
        "XFS: sunit and swidth options incompatible with the noalign option");
                return EINVAL;
        }
-        if ((args->flags & XFSMNT_GQUOTA) && (args->flags & XFSMNT_PQUOTA)) {
+        if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
+            (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) {
                cmn_err(CE_WARN,
                        "XFS: cannot mount with both project and group quota");
                return EINVAL;
        }
-        if ((args->flags & XFSMNT_DMAPI) && *args->mtpt == '\0') {
+        if ((mp->m_flags & XFS_MOUNT_DMAPI) && (!*mtpt || *mtpt[0] == '\0')) {
                printk("XFS: %s option needs the mount point option as well\n",
                        MNTOPT_DMAPI);
                return EINVAL;
@@ -438,27 +452,66 @@ xfs_parseargs(
         * Note that if "ikeep" or "noikeep" mount options are
         * supplied, then they are honored.
         */
-        if ((args->flags & XFSMNT_DMAPI) && dmapi_implies_ikeep)
+        if ((mp->m_flags & XFS_MOUNT_DMAPI) && dmapi_implies_ikeep)
-                args->flags |= XFSMNT_IKEEP;
+                mp->m_flags |= XFS_MOUNT_IKEEP;
-        if ((args->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) {
+done:
+        if (!(mp->m_flags & XFS_MOUNT_NOALIGN)) {
+                /*
+                 * At this point the superblock has not been read
+                 * in, therefore we do not know the block size.
+                 * Before the mount call ends we will convert
+                 * these to FSBs.
+                 */
                if (dsunit) {
-                        args->sunit = dsunit;
+                        mp->m_dalign = dsunit;
-                        args->flags |= XFSMNT_RETERR;
+                        mp->m_flags |= XFS_MOUNT_RETERR;
-                } else {
-                        args->sunit = vol_dsunit;
                }
-                dswidth ? (args->swidth = dswidth) :
-                          (args->swidth = vol_dswidth);
+                if (dswidth)
-        } else {
+                        mp->m_swidth = dswidth;
-                args->sunit = args->swidth = 0;
+        }
+        if (mp->m_logbufs != -1 &&
+            mp->m_logbufs != 0 &&
+            (mp->m_logbufs < XLOG_MIN_ICLOGS ||
+             mp->m_logbufs > XLOG_MAX_ICLOGS)) {
+                cmn_err(CE_WARN,
+                        "XFS: invalid logbufs value: %d [not %d-%d]",
+                        mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
+                return XFS_ERROR(EINVAL);
+        }
+        if (mp->m_logbsize != -1 &&
+            mp->m_logbsize !=  0 &&
+            (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE ||
+             mp->m_logbsize > XLOG_MAX_RECORD_BSIZE ||
+             !is_power_of_2(mp->m_logbsize))) {
+                cmn_err(CE_WARN,
+        "XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
+                        mp->m_logbsize);
+                return XFS_ERROR(EINVAL);
+        }
+        mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL);
+        if (!mp->m_fsname)
+                return ENOMEM;
+        mp->m_fsname_len = strlen(mp->m_fsname) + 1;
+        if (iosizelog) {
+                if (iosizelog > XFS_MAX_IO_LOG ||
+                    iosizelog < XFS_MIN_IO_LOG) {
+                        cmn_err(CE_WARN,
+                "XFS: invalid log iosize: %d [not %d-%d]",
+                                iosizelog, XFS_MIN_IO_LOG,
+                                XFS_MAX_IO_LOG);
+                        return XFS_ERROR(EINVAL);
+                }
+                mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
+                mp->m_readio_log = iosizelog;
+                mp->m_writeio_log = iosizelog;
        }
-done:
-        if (args->flags & XFSMNT_32BITINODES)
-                mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
-        if (args->flags2)
-                args->flags |= XFSMNT_FLAGS2;
        return 0;
 }
@@ -704,8 +757,7 @@ xfs_close_devices(
 */
 STATIC int
 xfs_open_devices(
-        struct xfs_mount        *mp,
+        struct xfs_mount        *mp)
-        struct xfs_mount_args   *args)
 {
        struct block_device     *ddev = mp->m_super->s_bdev;
        struct block_device     *logdev = NULL, *rtdev = NULL;
@@ -714,14 +766,14 @@ xfs_open_devices(
        /*
         * Open real time and log devices - order is important.
         */
-        if (args->logname[0]) {
+        if (mp->m_logname) {
-                error = xfs_blkdev_get(mp, args->logname, &logdev);
+                error = xfs_blkdev_get(mp, mp->m_logname, &logdev);
                if (error)
                        goto out;
        }
-        if (args->rtname[0]) {
+        if (mp->m_rtname) {
-                error = xfs_blkdev_get(mp, args->rtname, &rtdev);
+                error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev);
                if (error)
                        goto out_close_logdev;
@@ -813,18 +865,18 @@ xfs_setup_devices(
 */
 void
 xfsaild_wakeup(
-        xfs_mount_t             *mp,
+        struct xfs_ail          *ailp,
        xfs_lsn_t               threshold_lsn)
 {
-        mp->m_ail.xa_target = threshold_lsn;
+        ailp->xa_target = threshold_lsn;
-        wake_up_process(mp->m_ail.xa_task);
+        wake_up_process(ailp->xa_task);
 }
 int
 xfsaild(
        void    *data)
 {
-        xfs_mount_t     *mp = (xfs_mount_t *)data;
+        struct xfs_ail  *ailp = data;
        xfs_lsn_t       last_pushed_lsn = 0;
        long            tout = 0;
@@ -836,11 +888,11 @@ xfsaild(
                /* swsusp */
                try_to_freeze();
-                ASSERT(mp->m_log);
+                ASSERT(ailp->xa_mount->m_log);
-                if (XFS_FORCED_SHUTDOWN(mp))
+                if (XFS_FORCED_SHUTDOWN(ailp->xa_mount))
                        continue;
-                tout = xfsaild_push(mp, &last_pushed_lsn);
+                tout = xfsaild_push(ailp, &last_pushed_lsn);
        }
        return 0;
@@ -848,43 +900,82 @@ xfsaild(
 int
 xfsaild_start(
-        xfs_mount_t     *mp)
+        struct xfs_ail  *ailp)
 {
-        mp->m_ail.xa_target = 0;
+        ailp->xa_target = 0;
-        mp->m_ail.xa_task = kthread_run(xfsaild, mp, "xfsaild");
+        ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild");
-        if (IS_ERR(mp->m_ail.xa_task))
+        if (IS_ERR(ailp->xa_task))
-                return -PTR_ERR(mp->m_ail.xa_task);
+                return -PTR_ERR(ailp->xa_task);
        return 0;
 }
 void
 xfsaild_stop(
-        xfs_mount_t     *mp)
+        struct xfs_ail  *ailp)
 {
-        kthread_stop(mp->m_ail.xa_task);
+        kthread_stop(ailp->xa_task);
 }
+/* Catch misguided souls that try to use this interface on XFS */
 STATIC struct inode *
 xfs_fs_alloc_inode(
        struct super_block      *sb)
 {
-        return kmem_zone_alloc(xfs_vnode_zone, KM_SLEEP);
+        BUG();
+        return NULL;
 }
+/*
+ * Now that the generic code is guaranteed not to be accessing
+ * the linux inode, we can reclaim the inode.
+ */
 STATIC void
 xfs_fs_destroy_inode(
-        struct inode            *inode)
+        struct inode    *inode)
 {
-        kmem_zone_free(xfs_vnode_zone, inode);
+        xfs_inode_t             *ip = XFS_I(inode);
+        XFS_STATS_INC(vn_reclaim);
+        if (xfs_reclaim(ip))
+                panic("%s: cannot reclaim 0x%p\n", __func__, inode);
 }
+/*
+ * Slab object creation initialisation for the XFS inode.
+ * This covers only the idempotent fields in the XFS inode;
+ * all other fields need to be initialised on allocation
+ * from the slab. This avoids the need to repeatedly intialise
+ * fields in the xfs inode that left in the initialise state
+ * when freeing the inode.
+ */
 STATIC void
 xfs_fs_inode_init_once(
-        void                    *vnode)
+        void                    *inode)
 {
-        inode_init_once((struct inode *)vnode);
+        struct xfs_inode        *ip = inode;
+        memset(ip, 0, sizeof(struct xfs_inode));
+        /* vfs inode */
+        inode_init_once(VFS_I(ip));
+        /* xfs inode */
+        atomic_set(&ip->i_iocount, 0);
+        atomic_set(&ip->i_pincount, 0);
+        spin_lock_init(&ip->i_flags_lock);
+        init_waitqueue_head(&ip->i_ipin_wait);
+        /*
+         * Because we want to use a counting completion, complete
+         * the flush completion once to allow a single access to
+         * the flush completion without blocking.
+         */
+        init_completion(&ip->i_flush);
+        complete(&ip->i_flush);
+        mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
+                     "xfsino", ip->i_ino);
+        mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
 }
 /*
@@ -898,21 +989,26 @@ xfs_fs_write_inode(
        struct inode            *inode,
        int                     sync)
 {
+        struct xfs_inode        *ip = XFS_I(inode);
        int                     error = 0;
        int                     flags = 0;
-        xfs_itrace_entry(XFS_I(inode));
+        xfs_itrace_entry(ip);
        if (sync) {
-                filemap_fdatawait(inode->i_mapping);
+                error = xfs_wait_on_pages(ip, 0, -1);
+                if (error)
+                        goto out_error;
                flags |= FLUSH_SYNC;
        }
-        error = xfs_inode_flush(XFS_I(inode), flags);
+        error = xfs_inode_flush(ip, flags);
+out_error:
        /*
         * if we failed to write out the inode then mark
         * it dirty again so we'll try again later.
         */
        if (error)
-                mark_inode_dirty_sync(inode);
+                xfs_mark_inode_dirty_sync(ip);
        return -error;
 }
@@ -923,164 +1019,12 @@ xfs_fs_clear_inode(
 {
        xfs_inode_t             *ip = XFS_I(inode);
-        /*
+        xfs_itrace_entry(ip);
-         * ip can be null when xfs_iget_core calls xfs_idestroy if we
+        XFS_STATS_INC(vn_rele);
-         * find an inode with di_mode == 0 but without IGET_CREATE set.
+        XFS_STATS_INC(vn_remove);
-         */
+        XFS_STATS_DEC(vn_active);
-        if (ip) {
-                xfs_itrace_entry(ip);
-                XFS_STATS_INC(vn_rele);
-                XFS_STATS_INC(vn_remove);
-                XFS_STATS_INC(vn_reclaim);
-                XFS_STATS_DEC(vn_active);
-                xfs_inactive(ip);
-                xfs_iflags_clear(ip, XFS_IMODIFIED);
-                if (xfs_reclaim(ip))
-                        panic("%s: cannot reclaim 0x%p\n", __func__, inode);
-        }
-        ASSERT(XFS_I(inode) == NULL);
-}
-/*
+        xfs_inactive(ip);
- * Enqueue a work item to be picked up by the vfs xfssyncd thread.
- * Doing this has two advantages:
- * - It saves on stack space, which is tight in certain situations
- * - It can be used (with care) as a mechanism to avoid deadlocks.
- * Flushing while allocating in a full filesystem requires both.
- */
-STATIC void
-xfs_syncd_queue_work(
-        struct xfs_mount *mp,
-        void            *data,
-        void            (*syncer)(struct xfs_mount *, void *))
-{
-        struct bhv_vfs_sync_work *work;
-        work = kmem_alloc(sizeof(struct bhv_vfs_sync_work), KM_SLEEP);
-        INIT_LIST_HEAD(&work->w_list);
-        work->w_syncer = syncer;
-        work->w_data = data;
-        work->w_mount = mp;
-        spin_lock(&mp->m_sync_lock);
-        list_add_tail(&work->w_list, &mp->m_sync_list);
-        spin_unlock(&mp->m_sync_lock);
-        wake_up_process(mp->m_sync_task);
-}
-/*
- * Flush delayed allocate data, attempting to free up reserved space
- * from existing allocations.  At this point a new allocation attempt
- * has failed with ENOSPC and we are in the process of scratching our
- * heads, looking about for more room...
- */
-STATIC void
-xfs_flush_inode_work(
-        struct xfs_mount *mp,
-        void            *arg)
-{
-        struct inode    *inode = arg;
-        filemap_flush(inode->i_mapping);
-        iput(inode);
-}
-void
-xfs_flush_inode(
-        xfs_inode_t     *ip)
-{
-        struct inode    *inode = VFS_I(ip);
-        igrab(inode);
-        xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work);
-        delay(msecs_to_jiffies(500));
-}
-/*
- * This is the "bigger hammer" version of xfs_flush_inode_work...
- * (IOW, "If at first you don't succeed, use a Bigger Hammer").
- */
-STATIC void
-xfs_flush_device_work(
-        struct xfs_mount *mp,
-        void            *arg)
-{
-        struct inode    *inode = arg;
-        sync_blockdev(mp->m_super->s_bdev);
-        iput(inode);
-}
-void
-xfs_flush_device(
-        xfs_inode_t     *ip)
-{
-        struct inode    *inode = VFS_I(ip);
-        igrab(inode);
-        xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_device_work);
-        delay(msecs_to_jiffies(500));
-        xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC);
-}
-STATIC void
-xfs_sync_worker(
-        struct xfs_mount *mp,
-        void            *unused)
-{
-        int             error;
-        if (!(mp->m_flags & XFS_MOUNT_RDONLY))
-                error = xfs_sync(mp, SYNC_FSDATA | SYNC_BDFLUSH | SYNC_ATTR);
-        mp->m_sync_seq++;
-        wake_up(&mp->m_wait_single_sync_task);
-}
-STATIC int
-xfssyncd(
-        void                    *arg)
-{
-        struct xfs_mount        *mp = arg;
-        long                    timeleft;
-        bhv_vfs_sync_work_t     *work, *n;
-        LIST_HEAD               (tmp);
-        set_freezable();
-        timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
-        for (;;) {
-                timeleft = schedule_timeout_interruptible(timeleft);
-                /* swsusp */
-                try_to_freeze();
-                if (kthread_should_stop() && list_empty(&mp->m_sync_list))
-                        break;
-                spin_lock(&mp->m_sync_lock);
-                /*
-                 * We can get woken by laptop mode, to do a sync -
-                 * that's the (only!) case where the list would be
-                 * empty with time remaining.
-                 */
-                if (!timeleft || list_empty(&mp->m_sync_list)) {
-                        if (!timeleft)
-                                timeleft = xfs_syncd_centisecs *
-                                                        msecs_to_jiffies(10);
-                        INIT_LIST_HEAD(&mp->m_sync_work.w_list);
-                        list_add_tail(&mp->m_sync_work.w_list,
-                                        &mp->m_sync_list);
-                }
-                list_for_each_entry_safe(work, n, &mp->m_sync_list, w_list)
-                        list_move(&work->w_list, &tmp);
-                spin_unlock(&mp->m_sync_lock);
-                list_for_each_entry_safe(work, n, &tmp, w_list) {
-                        (*work->w_syncer)(mp, work->w_data);
-                        list_del(&work->w_list);
-                        if (work == &mp->m_sync_work)
-                                continue;
-                        kmem_free(work);
-                }
-        }
-        return 0;
 }
 STATIC void
@@ -1099,11 +1043,9 @@ xfs_fs_put_super(
        struct xfs_mount        *mp = XFS_M(sb);
        struct xfs_inode        *rip = mp->m_rootip;
        int                     unmount_event_flags = 0;
-        int                     error;
-        kthread_stop(mp->m_sync_task);
+        xfs_syncd_stop(mp);
+        xfs_sync_inodes(mp, SYNC_ATTR|SYNC_DELWRI);
-        xfs_sync(mp, SYNC_ATTR | SYNC_DELWRI);
 #ifdef HAVE_DMAPI
        if (mp->m_flags & XFS_MOUNT_DMAPI) {
@@ -1128,18 +1070,6 @@ xfs_fs_put_super(
        xfs_filestream_unmount(mp);
        XFS_bflush(mp->m_ddev_targp);
-        error = xfs_unmount_flush(mp, 0);
-        WARN_ON(error);
-        /*
-         * If we're forcing a shutdown, typically because of a media error,
-         * we want to make sure we invalidate dirty pages that belong to
-         * referenced vnodes as well.
-         */
-        if (XFS_FORCED_SHUTDOWN(mp)) {
-                error = xfs_sync(mp, SYNC_WAIT | SYNC_CLOSE);
-                ASSERT(error != EFSCORRUPTED);
-        }
        if (mp->m_flags & XFS_MOUNT_DMAPI) {
                XFS_SEND_UNMOUNT(mp, rip, DM_RIGHT_NULL, 0, 0,
@@ -1161,7 +1091,7 @@ xfs_fs_write_super(
        struct super_block      *sb)
 {
        if (!(sb->s_flags & MS_RDONLY))
-                xfs_sync(XFS_M(sb), SYNC_FSDATA);
+                xfs_sync_fsdata(XFS_M(sb), 0);
        sb->s_dirt = 0;
 }
@@ -1172,7 +1102,6 @@ xfs_fs_sync_super(
 {
        struct xfs_mount        *mp = XFS_M(sb);
        int                     error;
-        int                     flags;
        /*
         * Treat a sync operation like a freeze.  This is to work
@@ -1186,20 +1115,10 @@ xfs_fs_sync_super(
         * dirty the Linux inode until after the transaction I/O
         * completes.
         */
-        if (wait || unlikely(sb->s_frozen == SB_FREEZE_WRITE)) {
+        if (wait || unlikely(sb->s_frozen == SB_FREEZE_WRITE))
-                /*
+                error = xfs_quiesce_data(mp);
-                 * First stage of freeze - no more writers will make progress
+        else
-                 * now we are here, so we flush delwri and delalloc buffers
+                error = xfs_sync_fsdata(mp, 0);
-                 * here, then wait for all I/O to complete.  Data is frozen at
-                 * that point. Metadata is not frozen, transactions can still
-                 * occur here so don't bother flushing the buftarg (i.e
-                 * SYNC_QUIESCE) because it'll just get dirty again.
-                 */
-                flags = SYNC_DATA_QUIESCE;
-        } else
-                flags = SYNC_FSDATA;
-        error = xfs_sync(mp, flags);
        sb->s_dirt = 0;
        if (unlikely(laptop_mode)) {
@@ -1337,9 +1256,8 @@ xfs_fs_remount(
        /* rw -> ro */
        if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) {
-                xfs_filestream_flush(mp);
+                xfs_quiesce_data(mp);
-                xfs_sync(mp, SYNC_DATA_QUIESCE);
+                xfs_quiesce_attr(mp);
-                xfs_attr_quiesce(mp);
                mp->m_flags |= XFS_MOUNT_RDONLY;
        }
@@ -1348,7 +1266,7 @@ xfs_fs_remount(
 /*
 * Second stage of a freeze. The data is already frozen so we only
- * need to take care of themetadata. Once that's done write a dummy
+ * need to take care of the metadata. Once that's done write a dummy
 * record to dirty the log in case of a crash while frozen.
 */
 STATIC void
@@ -1357,7 +1275,7 @@ xfs_fs_lockfs(
 {
        struct xfs_mount        *mp = XFS_M(sb);
-        xfs_attr_quiesce(mp);
+        xfs_quiesce_attr(mp);
        xfs_fs_log_dummy(mp);
 }
@@ -1422,175 +1340,28 @@ xfs_fs_setxquota(
 /*
 * This function fills in xfs_mount_t fields based on mount args.
- * Note: the superblock has _not_ yet been read in.
- */
-STATIC int
-xfs_start_flags(
-        struct xfs_mount_args   *ap,
-        struct xfs_mount        *mp)
-{
-        int                     error;
-        /* Values are in BBs */
-        if ((ap->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) {
-                /*
-                 * At this point the superblock has not been read
-                 * in, therefore we do not know the block size.
-                 * Before the mount call ends we will convert
-                 * these to FSBs.
-                 */
-                mp->m_dalign = ap->sunit;
-                mp->m_swidth = ap->swidth;
-        }
-        if (ap->logbufs != -1 &&
-            ap->logbufs != 0 &&
-            (ap->logbufs < XLOG_MIN_ICLOGS ||
-             ap->logbufs > XLOG_MAX_ICLOGS)) {
-                cmn_err(CE_WARN,
-                        "XFS: invalid logbufs value: %d [not %d-%d]",
-                        ap->logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
-                return XFS_ERROR(EINVAL);
-        }
-        mp->m_logbufs = ap->logbufs;
-        if (ap->logbufsize != -1 &&
-            ap->logbufsize !=  0 &&
-            (ap->logbufsize < XLOG_MIN_RECORD_BSIZE ||
-             ap->logbufsize > XLOG_MAX_RECORD_BSIZE ||
-             !is_power_of_2(ap->logbufsize))) {
-                cmn_err(CE_WARN,
-        "XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
-                        ap->logbufsize);
-                return XFS_ERROR(EINVAL);
-        }
-        error = ENOMEM;
-        mp->m_logbsize = ap->logbufsize;
-        mp->m_fsname_len = strlen(ap->fsname) + 1;
-        mp->m_fsname = kstrdup(ap->fsname, GFP_KERNEL);
-        if (!mp->m_fsname)
-                goto out;
-        if (ap->rtname[0]) {
-                mp->m_rtname = kstrdup(ap->rtname, GFP_KERNEL);
-                if (!mp->m_rtname)
-                        goto out_free_fsname;
-        }
-        if (ap->logname[0]) {
-                mp->m_logname = kstrdup(ap->logname, GFP_KERNEL);
-                if (!mp->m_logname)
-                        goto out_free_rtname;
-        }
-        if (ap->flags & XFSMNT_WSYNC)
-                mp->m_flags |= XFS_MOUNT_WSYNC;
-#if XFS_BIG_INUMS
-        if (ap->flags & XFSMNT_INO64) {
-                mp->m_flags |= XFS_MOUNT_INO64;
-                mp->m_inoadd = XFS_INO64_OFFSET;
-        }
-#endif
-        if (ap->flags & XFSMNT_RETERR)
-                mp->m_flags |= XFS_MOUNT_RETERR;
-        if (ap->flags & XFSMNT_NOALIGN)
-                mp->m_flags |= XFS_MOUNT_NOALIGN;
-        if (ap->flags & XFSMNT_SWALLOC)
-                mp->m_flags |= XFS_MOUNT_SWALLOC;
-        if (ap->flags & XFSMNT_OSYNCISOSYNC)
-                mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC;
-        if (ap->flags & XFSMNT_32BITINODES)
-                mp->m_flags |= XFS_MOUNT_32BITINODES;
-        if (ap->flags & XFSMNT_IOSIZE) {
-                if (ap->iosizelog > XFS_MAX_IO_LOG ||
-                    ap->iosizelog < XFS_MIN_IO_LOG) {
-                        cmn_err(CE_WARN,
-                "XFS: invalid log iosize: %d [not %d-%d]",
-                                ap->iosizelog, XFS_MIN_IO_LOG,
-                                XFS_MAX_IO_LOG);
-                        return XFS_ERROR(EINVAL);
-                }
-                mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
-                mp->m_readio_log = mp->m_writeio_log = ap->iosizelog;
-        }
-        if (ap->flags & XFSMNT_IKEEP)
-                mp->m_flags |= XFS_MOUNT_IKEEP;
-        if (ap->flags & XFSMNT_DIRSYNC)
-                mp->m_flags |= XFS_MOUNT_DIRSYNC;
-        if (ap->flags & XFSMNT_ATTR2)
-                mp->m_flags |= XFS_MOUNT_ATTR2;
-        if (ap->flags & XFSMNT_NOATTR2)
-                mp->m_flags |= XFS_MOUNT_NOATTR2;
-        if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE)
-                mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
-        /*
-         * no recovery flag requires a read-only mount
-         */
-        if (ap->flags & XFSMNT_NORECOVERY) {
-                if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
-                        cmn_err(CE_WARN,
-        "XFS: tried to mount a FS read-write without recovery!");
-                        return XFS_ERROR(EINVAL);
-                }
-                mp->m_flags |= XFS_MOUNT_NORECOVERY;
-        }
-        if (ap->flags & XFSMNT_NOUUID)
-                mp->m_flags |= XFS_MOUNT_NOUUID;
-        if (ap->flags & XFSMNT_BARRIER)
-                mp->m_flags |= XFS_MOUNT_BARRIER;
-        else
-                mp->m_flags &= ~XFS_MOUNT_BARRIER;
-        if (ap->flags2 & XFSMNT2_FILESTREAMS)
-                mp->m_flags |= XFS_MOUNT_FILESTREAMS;
-        if (ap->flags & XFSMNT_DMAPI)
-                mp->m_flags |= XFS_MOUNT_DMAPI;
-        return 0;
- out_free_rtname:
-        kfree(mp->m_rtname);
- out_free_fsname:
-        kfree(mp->m_fsname);
- out:
-        return error;
-}
-/*
- * This function fills in xfs_mount_t fields based on mount args.
 * Note: the superblock _has_ now been read in.
 */
 STATIC int
 xfs_finish_flags(
-        struct xfs_mount_args   *ap,
        struct xfs_mount        *mp)
 {
        int                     ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
        /* Fail a mount where the logbuf is smaller then the log stripe */
        if (xfs_sb_version_haslogv2(&mp->m_sb)) {
-                if ((ap->logbufsize <= 0) &&
+                if (mp->m_logbsize <= 0 &&
-                    (mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE)) {
+                    mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) {
                        mp->m_logbsize = mp->m_sb.sb_logsunit;
-                } else if (ap->logbufsize > 0 &&
+                } else if (mp->m_logbsize > 0 &&
-                           ap->logbufsize < mp->m_sb.sb_logsunit) {
+                           mp->m_logbsize < mp->m_sb.sb_logsunit) {
                        cmn_err(CE_WARN,
        "XFS: logbuf size must be greater than or equal to log stripe size");
                        return XFS_ERROR(EINVAL);
                }
        } else {
                /* Fail a mount if the logbuf is larger than 32K */
-                if (ap->logbufsize > XLOG_BIG_RECORD_BSIZE) {
+                if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) {
                        cmn_err(CE_WARN,
        "XFS: logbuf size for version 1 logs must be 16K or 32K");
                        return XFS_ERROR(EINVAL);
@@ -1602,7 +1373,7 @@ xfs_finish_flags(
         * told by noattr2 to turn it off
         */
        if (xfs_sb_version_hasattr2(&mp->m_sb) &&
-            !(ap->flags & XFSMNT_NOATTR2))
+            !(mp->m_flags & XFS_MOUNT_NOATTR2))
                mp->m_flags |= XFS_MOUNT_ATTR2;
        /*
@@ -1614,48 +1385,6 @@ xfs_finish_flags(
                return XFS_ERROR(EROFS);
        }
-        /*
-         * check for shared mount.
-         */
-        if (ap->flags & XFSMNT_SHARED) {
-                if (!xfs_sb_version_hasshared(&mp->m_sb))
-                        return XFS_ERROR(EINVAL);
-                /*
-                 * For IRIX 6.5, shared mounts must have the shared
-                 * version bit set, have the persistent readonly
-                 * field set, must be version 0 and can only be mounted
-                 * read-only.
-                 */
-                if (!ronly || !(mp->m_sb.sb_flags & XFS_SBF_READONLY) ||
-                     (mp->m_sb.sb_shared_vn != 0))
-                        return XFS_ERROR(EINVAL);
-                mp->m_flags |= XFS_MOUNT_SHARED;
-                /*
-                 * Shared XFS V0 can't deal with DMI.  Return EINVAL.
-                 */
-                if (mp->m_sb.sb_shared_vn == 0 && (ap->flags & XFSMNT_DMAPI))
-                        return XFS_ERROR(EINVAL);
-        }
-        if (ap->flags & XFSMNT_UQUOTA) {
-                mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
-                if (ap->flags & XFSMNT_UQUOTAENF)
-                        mp->m_qflags |= XFS_UQUOTA_ENFD;
-        }
-        if (ap->flags & XFSMNT_GQUOTA) {
-                mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
-                if (ap->flags & XFSMNT_GQUOTAENF)
-                        mp->m_qflags |= XFS_OQUOTA_ENFD;
-        } else if (ap->flags & XFSMNT_PQUOTA) {
-                mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
-                if (ap->flags & XFSMNT_PQUOTAENF)
-                        mp->m_qflags |= XFS_OQUOTA_ENFD;
-        }
        return 0;
 }
@@ -1667,19 +1396,14 @@ xfs_fs_fill_super(
 {
        struct inode            *root;
        struct xfs_mount        *mp = NULL;
-        struct xfs_mount_args   *args;
        int                     flags = 0, error = ENOMEM;
+        char                    *mtpt = NULL;
-        args = xfs_args_allocate(sb, silent);
-        if (!args)
-                return -ENOMEM;
        mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
        if (!mp)
-                goto out_free_args;
+                goto out;
        spin_lock_init(&mp->m_sb_lock);
-        mutex_init(&mp->m_ilock);
        mutex_init(&mp->m_growlock);
        atomic_set(&mp->m_active_trans, 0);
        INIT_LIST_HEAD(&mp->m_sync_list);
@@ -1689,12 +1413,9 @@ xfs_fs_fill_super(
        mp->m_super = sb;
        sb->s_fs_info = mp;
-        if (sb->s_flags & MS_RDONLY)
+        error = xfs_parseargs(mp, (char *)data, &mtpt);
-                mp->m_flags |= XFS_MOUNT_RDONLY;
-        error = xfs_parseargs(mp, (char *)data, args, 0);
        if (error)
-                goto out_free_mp;
+                goto out_free_fsname;
        sb_min_blocksize(sb, BBSIZE);
        sb->s_xattr = xfs_xattr_handlers;
@@ -1702,33 +1423,28 @@ xfs_fs_fill_super(
        sb->s_qcop = &xfs_quotactl_operations;
        sb->s_op = &xfs_super_operations;
-        error = xfs_dmops_get(mp, args);
+        error = xfs_dmops_get(mp);
        if (error)
-                goto out_free_mp;
+                goto out_free_fsname;
-        error = xfs_qmops_get(mp, args);
+        error = xfs_qmops_get(mp);
        if (error)
                goto out_put_dmops;
-        if (args->flags & XFSMNT_QUIET)
+        if (silent)
                flags |= XFS_MFSI_QUIET;
-        error = xfs_open_devices(mp, args);
+        error = xfs_open_devices(mp);
        if (error)
                goto out_put_qmops;
        if (xfs_icsb_init_counters(mp))
                mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
-        /*
-         * Setup flags based on mount(2) options and then the superblock
-         */
-        error = xfs_start_flags(args, mp);
-        if (error)
-                goto out_free_fsname;
        error = xfs_readsb(mp, flags);
        if (error)
-                goto out_free_fsname;
+                goto out_destroy_counters;
-        error = xfs_finish_flags(args, mp);
+        error = xfs_finish_flags(mp);
        if (error)
                goto out_free_sb;
@@ -1747,7 +1463,7 @@ xfs_fs_fill_super(
        if (error)
                goto out_filestream_unmount;
-        XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, args->mtpt, args->fsname);
+        XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, mtpt, mp->m_fsname);
        sb->s_dirt = 1;
        sb->s_magic = XFS_SB_MAGIC;
@@ -1772,35 +1488,31 @@ xfs_fs_fill_super(
                goto fail_vnrele;
        }
-        mp->m_sync_work.w_syncer = xfs_sync_worker;
+        error = xfs_syncd_init(mp);
-        mp->m_sync_work.w_mount = mp;
+        if (error)
-        mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd");
-        if (IS_ERR(mp->m_sync_task)) {
-                error = -PTR_ERR(mp->m_sync_task);
                goto fail_vnrele;
-        }
-        xfs_itrace_exit(XFS_I(sb->s_root->d_inode));
+        kfree(mtpt);
-        kfree(args);
+        xfs_itrace_exit(XFS_I(sb->s_root->d_inode));
        return 0;
 out_filestream_unmount:
        xfs_filestream_unmount(mp);
 out_free_sb:
        xfs_freesb(mp);
- out_free_fsname:
+ out_destroy_counters:
-        xfs_free_fsname(mp);
        xfs_icsb_destroy_counters(mp);
        xfs_close_devices(mp);
 out_put_qmops:
        xfs_qmops_put(mp);
 out_put_dmops:
        xfs_dmops_put(mp);
- out_free_mp:
+ out_free_fsname:
+        xfs_free_fsname(mp);
+        kfree(mtpt);
        kfree(mp);
- out_free_args:
+ out:
-        kfree(args);
        return -error;
 fail_vnrele:
@@ -1820,8 +1532,6 @@ xfs_fs_fill_super(
        xfs_filestream_unmount(mp);
        XFS_bflush(mp->m_ddev_targp);
-        error = xfs_unmount_flush(mp, 0);
-        WARN_ON(error);
        xfs_unmountfs(mp);
        goto out_free_sb;
@@ -1882,10 +1592,19 @@ xfs_alloc_trace_bufs(void)
        if (!xfs_bmap_trace_buf)
                goto out_free_alloc_trace;
 #endif
-#ifdef XFS_BMBT_TRACE
+#ifdef XFS_BTREE_TRACE
+        xfs_allocbt_trace_buf = ktrace_alloc(XFS_ALLOCBT_TRACE_SIZE,
+                                             KM_MAYFAIL);
+        if (!xfs_allocbt_trace_buf)
+                goto out_free_bmap_trace;
+        xfs_inobt_trace_buf = ktrace_alloc(XFS_INOBT_TRACE_SIZE, KM_MAYFAIL);
+        if (!xfs_inobt_trace_buf)
+                goto out_free_allocbt_trace;
        xfs_bmbt_trace_buf = ktrace_alloc(XFS_BMBT_TRACE_SIZE, KM_MAYFAIL);
        if (!xfs_bmbt_trace_buf)
-                goto out_free_bmap_trace;
+                goto out_free_inobt_trace;
 #endif
 #ifdef XFS_ATTR_TRACE
        xfs_attr_trace_buf = ktrace_alloc(XFS_ATTR_TRACE_SIZE, KM_MAYFAIL);
@@ -1907,8 +1626,12 @@ xfs_alloc_trace_bufs(void)
        ktrace_free(xfs_attr_trace_buf);
 out_free_bmbt_trace:
 #endif
-#ifdef XFS_BMBT_TRACE
+#ifdef XFS_BTREE_TRACE
        ktrace_free(xfs_bmbt_trace_buf);
+ out_free_inobt_trace:
+        ktrace_free(xfs_inobt_trace_buf);
+ out_free_allocbt_trace:
+        ktrace_free(xfs_allocbt_trace_buf);
 out_free_bmap_trace:
 #endif
 #ifdef XFS_BMAP_TRACE
@@ -1931,8 +1654,10 @@ xfs_free_trace_bufs(void)
 #ifdef XFS_ATTR_TRACE
        ktrace_free(xfs_attr_trace_buf);
 #endif
-#ifdef XFS_BMBT_TRACE
+#ifdef XFS_BTREE_TRACE
        ktrace_free(xfs_bmbt_trace_buf);
+        ktrace_free(xfs_inobt_trace_buf);
+        ktrace_free(xfs_allocbt_trace_buf);
 #endif
 #ifdef XFS_BMAP_TRACE
        ktrace_free(xfs_bmap_trace_buf);
@@ -1945,16 +1670,10 @@ xfs_free_trace_bufs(void)
 STATIC int __init
 xfs_init_zones(void)
 {
-        xfs_vnode_zone = kmem_zone_init_flags(sizeof(struct inode), "xfs_vnode",
-                                        KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
-                                        KM_ZONE_SPREAD,
-                                        xfs_fs_inode_init_once);
-        if (!xfs_vnode_zone)
-                goto out;
        xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
        if (!xfs_ioend_zone)
-                goto out_destroy_vnode_zone;
+                goto out;
        xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE,
                                                  xfs_ioend_zone);
@@ -1970,6 +1689,7 @@ xfs_init_zones(void)
                                                "xfs_bmap_free_item");
        if (!xfs_bmap_free_item_zone)
                goto out_destroy_log_ticket_zone;
        xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
                                                "xfs_btree_cur");
        if (!xfs_btree_cur_zone)
@@ -2017,8 +1737,8 @@ xfs_init_zones(void)
        xfs_inode_zone =
                kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode",
-                                        KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
+                        KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | KM_ZONE_SPREAD,
-                                        KM_ZONE_SPREAD, NULL);
+                        xfs_fs_inode_init_once);
        if (!xfs_inode_zone)
                goto out_destroy_efi_zone;
@@ -2066,8 +1786,6 @@ xfs_init_zones(void)
        mempool_destroy(xfs_ioend_pool);
 out_destroy_ioend_zone:
        kmem_zone_destroy(xfs_ioend_zone);
- out_destroy_vnode_zone:
-        kmem_zone_destroy(xfs_vnode_zone);
 out:
        return -ENOMEM;
 }
@@ -2092,7 +1810,6 @@ xfs_destroy_zones(void)
        kmem_zone_destroy(xfs_log_ticket_zone);
        mempool_destroy(xfs_ioend_pool);
        kmem_zone_destroy(xfs_ioend_zone);
-        kmem_zone_destroy(xfs_vnode_zone);
 }
@@ -2100,13 +1817,12 @@ STATIC int __init
 init_xfs_fs(void)
 {
        int                     error;
-        static char             message[] __initdata = KERN_INFO \
-                XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled\n";
-        printk(message);
+        printk(KERN_INFO XFS_VERSION_STRING " with "
+                         XFS_BUILD_OPTIONS " enabled\n");
        ktrace_init(64);
-        vn_init();
+        xfs_ioend_init();
        xfs_dir_startup();
        error = xfs_init_zones();
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index fe2ef4e6a0f9..d5d776d4cd67 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -20,24 +20,12 @@
 #include <linux/exportfs.h>
-#ifdef CONFIG_XFS_DMAPI
-# define vfs_insertdmapi(vfs)   vfs_insertops(vfsp, &xfs_dmops)
-# define vfs_initdmapi()        dmapi_init()
-# define vfs_exitdmapi()        dmapi_uninit()
-#else
-# define vfs_insertdmapi(vfs)   do { } while (0)
-# define vfs_initdmapi()        do { } while (0)
-# define vfs_exitdmapi()        do { } while (0)
-#endif
 #ifdef CONFIG_XFS_QUOTA
-# define vfs_insertquota(vfs)   vfs_insertops(vfsp, &xfs_qmops)
 extern void xfs_qm_init(void);
 extern void xfs_qm_exit(void);
 # define vfs_initquota()        xfs_qm_init()
 # define vfs_exitquota()        xfs_qm_exit()
 #else
-# define vfs_insertquota(vfs)   do { } while (0)
 # define vfs_initquota()        do { } while (0)
 # define vfs_exitquota()        do { } while (0)
 #endif
@@ -101,9 +89,6 @@ struct block_device;
 extern __uint64_t xfs_max_file_offset(unsigned int);
-extern void xfs_flush_inode(struct xfs_inode *);
-extern void xfs_flush_device(struct xfs_inode *);
 extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
 extern const struct export_operations xfs_export_operations;
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
new file mode 100644
index 000000000000..2ed035354c26
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -0,0 +1,762 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_types.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_dir2.h"
+#include "xfs_dmapi.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_btree.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
+#include "xfs_inode.h"
+#include "xfs_dinode.h"
+#include "xfs_error.h"
+#include "xfs_mru_cache.h"
+#include "xfs_filestream.h"
+#include "xfs_vnodeops.h"
+#include "xfs_utils.h"
+#include "xfs_buf_item.h"
+#include "xfs_inode_item.h"
+#include "xfs_rw.h"
+#include <linux/kthread.h>
+#include <linux/freezer.h>
+/*
+ * Sync all the inodes in the given AG according to the
+ * direction given by the flags.
+ */
+STATIC int
+xfs_sync_inodes_ag(
+        xfs_mount_t     *mp,
+        int             ag,
+        int             flags)
+{
+        xfs_perag_t     *pag = &mp->m_perag[ag];
+        int             nr_found;
+        uint32_t        first_index = 0;
+        int             error = 0;
+        int             last_error = 0;
+        int             fflag = XFS_B_ASYNC;
+        if (flags & SYNC_DELWRI)
+                fflag = XFS_B_DELWRI;
+        if (flags & SYNC_WAIT)
+                fflag = 0;              /* synchronous overrides all */
+        do {
+                struct inode    *inode;
+                xfs_inode_t     *ip = NULL;
+                int             lock_flags = XFS_ILOCK_SHARED;
+                /*
+                 * use a gang lookup to find the next inode in the tree
+                 * as the tree is sparse and a gang lookup walks to find
+                 * the number of objects requested.
+                 */
+                read_lock(&pag->pag_ici_lock);
+                nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
+                                (void**)&ip, first_index, 1);
+                if (!nr_found) {
+                        read_unlock(&pag->pag_ici_lock);
+                        break;
+                }
+                /*
+                 * Update the index for the next lookup. Catch overflows
+                 * into the next AG range which can occur if we have inodes
+                 * in the last block of the AG and we are currently
+                 * pointing to the last inode.
+                 */
+                first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+                if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
+                        read_unlock(&pag->pag_ici_lock);
+                        break;
+                }
+                /* nothing to sync during shutdown */
+                if (XFS_FORCED_SHUTDOWN(mp)) {
+                        read_unlock(&pag->pag_ici_lock);
+                        return 0;
+                }
+                /*
+                 * If we can't get a reference on the inode, it must be
+                 * in reclaim. Leave it for the reclaim code to flush.
+                 */
+                inode = VFS_I(ip);
+                if (!igrab(inode)) {
+                        read_unlock(&pag->pag_ici_lock);
+                        continue;
+                }
+                read_unlock(&pag->pag_ici_lock);
+                /* avoid new or bad inodes */
+                if (is_bad_inode(inode) ||
+                    xfs_iflags_test(ip, XFS_INEW)) {
+                        IRELE(ip);
+                        continue;
+                }
+                /*
+                 * If we have to flush data or wait for I/O completion
+                 * we need to hold the iolock.
+                 */
+                if ((flags & SYNC_DELWRI) && VN_DIRTY(inode)) {
+                        xfs_ilock(ip, XFS_IOLOCK_SHARED);
+                        lock_flags |= XFS_IOLOCK_SHARED;
+                        error = xfs_flush_pages(ip, 0, -1, fflag, FI_NONE);
+                        if (flags & SYNC_IOWAIT)
+                                xfs_ioend_wait(ip);
+                }
+                xfs_ilock(ip, XFS_ILOCK_SHARED);
+                if ((flags & SYNC_ATTR) && !xfs_inode_clean(ip)) {
+                        if (flags & SYNC_WAIT) {
+                                xfs_iflock(ip);
+                                if (!xfs_inode_clean(ip))
+                                        error = xfs_iflush(ip, XFS_IFLUSH_SYNC);
+                                else
+                                        xfs_ifunlock(ip);
+                        } else if (xfs_iflock_nowait(ip)) {
+                                if (!xfs_inode_clean(ip))
+                                        error = xfs_iflush(ip, XFS_IFLUSH_DELWRI);
+                                else
+                                        xfs_ifunlock(ip);
+                        }
+                }
+                xfs_iput(ip, lock_flags);
+                if (error)
+                        last_error = error;
+                /*
+                 * bail out if the filesystem is corrupted.
+                 */
+                if (error == EFSCORRUPTED)
+                        return XFS_ERROR(error);
+        } while (nr_found);
+        return last_error;
+}
+int
+xfs_sync_inodes(
+        xfs_mount_t     *mp,
+        int             flags)
+{
+        int             error;
+        int             last_error;
+        int             i;
+        int             lflags = XFS_LOG_FORCE;
+        if (mp->m_flags & XFS_MOUNT_RDONLY)
+                return 0;
+        error = 0;
+        last_error = 0;
+        if (flags & SYNC_WAIT)
+                lflags |= XFS_LOG_SYNC;
+        for (i = 0; i < mp->m_sb.sb_agcount; i++) {
+                if (!mp->m_perag[i].pag_ici_init)
+                        continue;
+                error = xfs_sync_inodes_ag(mp, i, flags);
+                if (error)
+                        last_error = error;
+                if (error == EFSCORRUPTED)
+                        break;
+        }
+        if (flags & SYNC_DELWRI)
+                xfs_log_force(mp, 0, lflags);
+        return XFS_ERROR(last_error);
+}
+STATIC int
+xfs_commit_dummy_trans(
+        struct xfs_mount        *mp,
+        uint                    log_flags)
+{
+        struct xfs_inode        *ip = mp->m_rootip;
+        struct xfs_trans        *tp;
+        int                     error;
+        /*
+         * Put a dummy transaction in the log to tell recovery
+         * that all others are OK.
+         */
+        tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1);
+        error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
+        if (error) {
+                xfs_trans_cancel(tp, 0);
+                return error;
+        }
+        xfs_ilock(ip, XFS_ILOCK_EXCL);
+        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+        xfs_trans_ihold(tp, ip);
+        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+        /* XXX(hch): ignoring the error here.. */
+        error = xfs_trans_commit(tp, 0);
+        xfs_iunlock(ip, XFS_ILOCK_EXCL);
+        xfs_log_force(mp, 0, log_flags);
+        return 0;
+}
+int
+xfs_sync_fsdata(
+        struct xfs_mount        *mp,
+        int                     flags)
+{
+        struct xfs_buf          *bp;
+        struct xfs_buf_log_item *bip;
+        int                     error = 0;
+        /*
+         * If this is xfssyncd() then only sync the superblock if we can
+         * lock it without sleeping and it is not pinned.
+         */
+        if (flags & SYNC_BDFLUSH) {
+                ASSERT(!(flags & SYNC_WAIT));
+                bp = xfs_getsb(mp, XFS_BUF_TRYLOCK);
+                if (!bp)
+                        goto out;
+                bip = XFS_BUF_FSPRIVATE(bp, struct xfs_buf_log_item *);
+                if (!bip || !xfs_buf_item_dirty(bip) || XFS_BUF_ISPINNED(bp))
+                        goto out_brelse;
+        } else {
+                bp = xfs_getsb(mp, 0);
+                /*
+                 * If the buffer is pinned then push on the log so we won't
+                 * get stuck waiting in the write for someone, maybe
+                 * ourselves, to flush the log.
+                 *
+                 * Even though we just pushed the log above, we did not have
+                 * the superblock buffer locked at that point so it can
+                 * become pinned in between there and here.
+                 */
+                if (XFS_BUF_ISPINNED(bp))
+                        xfs_log_force(mp, 0, XFS_LOG_FORCE);
+        }
+        if (flags & SYNC_WAIT)
+                XFS_BUF_UNASYNC(bp);
+        else
+                XFS_BUF_ASYNC(bp);
+        return xfs_bwrite(mp, bp);
+ out_brelse:
+        xfs_buf_relse(bp);
+ out:
+        return error;
+}
+/*
+ * When remounting a filesystem read-only or freezing the filesystem, we have
+ * two phases to execute. This first phase is syncing the data before we
+ * quiesce the filesystem, and the second is flushing all the inodes out after
+ * we've waited for all the transactions created by the first phase to
+ * complete. The second phase ensures that the inodes are written to their
+ * location on disk rather than just existing in transactions in the log. This
+ * means after a quiesce there is no log replay required to write the inodes to
+ * disk (this is the main difference between a sync and a quiesce).
+ */
+/*
+ * First stage of freeze - no writers will make progress now we are here,
+ * so we flush delwri and delalloc buffers here, then wait for all I/O to
+ * complete.  Data is frozen at that point. Metadata is not frozen,
+ * transactions can still occur here so don't bother flushing the buftarg
+ * because it'll just get dirty again.
+ */
+int
+xfs_quiesce_data(
+        struct xfs_mount        *mp)
+{
+        int error;
+        /* push non-blocking */
+        xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_BDFLUSH);
+        XFS_QM_DQSYNC(mp, SYNC_BDFLUSH);
+        xfs_filestream_flush(mp);
+        /* push and block */
+        xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_WAIT|SYNC_IOWAIT);
+        XFS_QM_DQSYNC(mp, SYNC_WAIT);
+        /* write superblock and hoover up shutdown errors */
+        error = xfs_sync_fsdata(mp, 0);
+        /* flush data-only devices */
+        if (mp->m_rtdev_targp)
+                XFS_bflush(mp->m_rtdev_targp);
+        return error;
+}
+STATIC void
+xfs_quiesce_fs(
+        struct xfs_mount        *mp)
+{
+        int     count = 0, pincount;
+        xfs_flush_buftarg(mp->m_ddev_targp, 0);
+        xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
+        /*
+         * This loop must run at least twice.  The first instance of the loop
+         * will flush most meta data but that will generate more meta data
+         * (typically directory updates).  Which then must be flushed and
+         * logged before we can write the unmount record.
+         */
+        do {
+                xfs_sync_inodes(mp, SYNC_ATTR|SYNC_WAIT);
+                pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
+                if (!pincount) {
+                        delay(50);
+                        count++;
+                }
+        } while (count < 2);
+}
+/*
+ * Second stage of a quiesce. The data is already synced, now we have to take
+ * care of the metadata. New transactions are already blocked, so we need to
+ * wait for any remaining transactions to drain out before proceding.
+ */
+void
+xfs_quiesce_attr(
+        struct xfs_mount        *mp)
+{
+        int     error = 0;
+        /* wait for all modifications to complete */
+        while (atomic_read(&mp->m_active_trans) > 0)
+                delay(100);
+        /* flush inodes and push all remaining buffers out to disk */
+        xfs_quiesce_fs(mp);
+        ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0);
+        /* Push the superblock and write an unmount record */
+        error = xfs_log_sbcount(mp, 1);
+        if (error)
+                xfs_fs_cmn_err(CE_WARN, mp,
+                                "xfs_attr_quiesce: failed to log sb changes. "
+                                "Frozen image may not be consistent.");
+        xfs_log_unmount_write(mp);
+        xfs_unmountfs_writesb(mp);
+}
+/*
+ * Enqueue a work item to be picked up by the vfs xfssyncd thread.
+ * Doing this has two advantages:
+ * - It saves on stack space, which is tight in certain situations
+ * - It can be used (with care) as a mechanism to avoid deadlocks.
+ * Flushing while allocating in a full filesystem requires both.
+ */
+STATIC void
+xfs_syncd_queue_work(
+        struct xfs_mount *mp,
+        void            *data,
+        void            (*syncer)(struct xfs_mount *, void *))
+{
+        struct bhv_vfs_sync_work *work;
+        work = kmem_alloc(sizeof(struct bhv_vfs_sync_work), KM_SLEEP);
+        INIT_LIST_HEAD(&work->w_list);
+        work->w_syncer = syncer;
+        work->w_data = data;
+        work->w_mount = mp;
+        spin_lock(&mp->m_sync_lock);
+        list_add_tail(&work->w_list, &mp->m_sync_list);
+        spin_unlock(&mp->m_sync_lock);
+        wake_up_process(mp->m_sync_task);
+}
+/*
+ * Flush delayed allocate data, attempting to free up reserved space
+ * from existing allocations.  At this point a new allocation attempt
+ * has failed with ENOSPC and we are in the process of scratching our
+ * heads, looking about for more room...
+ */
+STATIC void
+xfs_flush_inode_work(
+        struct xfs_mount *mp,
+        void            *arg)
+{
+        struct inode    *inode = arg;
+        filemap_flush(inode->i_mapping);
+        iput(inode);
+}
+void
+xfs_flush_inode(
+        xfs_inode_t     *ip)
+{
+        struct inode    *inode = VFS_I(ip);
+        igrab(inode);
+        xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work);
+        delay(msecs_to_jiffies(500));
+}
+/*
+ * This is the "bigger hammer" version of xfs_flush_inode_work...
+ * (IOW, "If at first you don't succeed, use a Bigger Hammer").
+ */
+STATIC void
+xfs_flush_device_work(
+        struct xfs_mount *mp,
+        void            *arg)
+{
+        struct inode    *inode = arg;
+        sync_blockdev(mp->m_super->s_bdev);
+        iput(inode);
+}
+void
+xfs_flush_device(
+        xfs_inode_t     *ip)
+{
+        struct inode    *inode = VFS_I(ip);
+        igrab(inode);
+        xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_device_work);
+        delay(msecs_to_jiffies(500));
+        xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC);
+}
+/*
+ * Every sync period we need to unpin all items, reclaim inodes, sync
+ * quota and write out the superblock. We might need to cover the log
+ * to indicate it is idle.
+ */
+STATIC void
+xfs_sync_worker(
+        struct xfs_mount *mp,
+        void            *unused)
+{
+        int             error;
+        if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
+                xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
+                xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
+                /* dgc: errors ignored here */
+                error = XFS_QM_DQSYNC(mp, SYNC_BDFLUSH);
+                error = xfs_sync_fsdata(mp, SYNC_BDFLUSH);
+                if (xfs_log_need_covered(mp))
+                        error = xfs_commit_dummy_trans(mp, XFS_LOG_FORCE);
+        }
+        mp->m_sync_seq++;
+        wake_up(&mp->m_wait_single_sync_task);
+}
+STATIC int
+xfssyncd(
+        void                    *arg)
+{
+        struct xfs_mount        *mp = arg;
+        long                    timeleft;
+        bhv_vfs_sync_work_t     *work, *n;
+        LIST_HEAD               (tmp);
+        set_freezable();
+        timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
+        for (;;) {
+                timeleft = schedule_timeout_interruptible(timeleft);
+                /* swsusp */
+                try_to_freeze();
+                if (kthread_should_stop() && list_empty(&mp->m_sync_list))
+                        break;
+                spin_lock(&mp->m_sync_lock);
+                /*
+                 * We can get woken by laptop mode, to do a sync -
+                 * that's the (only!) case where the list would be
+                 * empty with time remaining.
+                 */
+                if (!timeleft || list_empty(&mp->m_sync_list)) {
+                        if (!timeleft)
+                                timeleft = xfs_syncd_centisecs *
+                                                        msecs_to_jiffies(10);
+                        INIT_LIST_HEAD(&mp->m_sync_work.w_list);
+                        list_add_tail(&mp->m_sync_work.w_list,
+                                        &mp->m_sync_list);
+                }
+                list_for_each_entry_safe(work, n, &mp->m_sync_list, w_list)
+                        list_move(&work->w_list, &tmp);
+                spin_unlock(&mp->m_sync_lock);
+                list_for_each_entry_safe(work, n, &tmp, w_list) {
+                        (*work->w_syncer)(mp, work->w_data);
+                        list_del(&work->w_list);
+                        if (work == &mp->m_sync_work)
+                                continue;
+                        kmem_free(work);
+                }
+        }
+        return 0;
+}
+int
+xfs_syncd_init(
+        struct xfs_mount        *mp)
+{
+        mp->m_sync_work.w_syncer = xfs_sync_worker;
+        mp->m_sync_work.w_mount = mp;
+        mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd");
+        if (IS_ERR(mp->m_sync_task))
+                return -PTR_ERR(mp->m_sync_task);
+        return 0;
+}
+void
+xfs_syncd_stop(
+        struct xfs_mount        *mp)
+{
+        kthread_stop(mp->m_sync_task);
+}
+int
+xfs_reclaim_inode(
+        xfs_inode_t     *ip,
+        int             locked,
+        int             sync_mode)
+{
+        xfs_perag_t     *pag = xfs_get_perag(ip->i_mount, ip->i_ino);
+        /* The hash lock here protects a thread in xfs_iget_core from
+         * racing with us on linking the inode back with a vnode.
+         * Once we have the XFS_IRECLAIM flag set it will not touch
+         * us.
+         */
+        write_lock(&pag->pag_ici_lock);
+        spin_lock(&ip->i_flags_lock);
+        if (__xfs_iflags_test(ip, XFS_IRECLAIM) ||
+            !__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) {
+                spin_unlock(&ip->i_flags_lock);
+                write_unlock(&pag->pag_ici_lock);
+                if (locked) {
+                        xfs_ifunlock(ip);
+                        xfs_iunlock(ip, XFS_ILOCK_EXCL);
+                }
+                return 1;
+        }
+        __xfs_iflags_set(ip, XFS_IRECLAIM);
+        spin_unlock(&ip->i_flags_lock);
+        write_unlock(&pag->pag_ici_lock);
+        xfs_put_perag(ip->i_mount, pag);
+        /*
+         * If the inode is still dirty, then flush it out.  If the inode
+         * is not in the AIL, then it will be OK to flush it delwri as
+         * long as xfs_iflush() does not keep any references to the inode.
+         * We leave that decision up to xfs_iflush() since it has the
+         * knowledge of whether it's OK to simply do a delwri flush of
+         * the inode or whether we need to wait until the inode is
+         * pulled from the AIL.
+         * We get the flush lock regardless, though, just to make sure
+         * we don't free it while it is being flushed.
+         */
+        if (!locked) {
+                xfs_ilock(ip, XFS_ILOCK_EXCL);
+                xfs_iflock(ip);
+        }
+        /*
+         * In the case of a forced shutdown we rely on xfs_iflush() to
+         * wait for the inode to be unpinned before returning an error.
+         */
+        if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) {
+                /* synchronize with xfs_iflush_done */
+                xfs_iflock(ip);
+                xfs_ifunlock(ip);
+        }
+        xfs_iunlock(ip, XFS_ILOCK_EXCL);
+        xfs_ireclaim(ip);
+        return 0;
+}
+/*
+ * We set the inode flag atomically with the radix tree tag.
+ * Once we get tag lookups on the radix tree, this inode flag
+ * can go away.
+ */
+void
+xfs_inode_set_reclaim_tag(
+        xfs_inode_t     *ip)
+{
+        xfs_mount_t     *mp = ip->i_mount;
+        xfs_perag_t     *pag = xfs_get_perag(mp, ip->i_ino);
+        read_lock(&pag->pag_ici_lock);
+        spin_lock(&ip->i_flags_lock);
+        radix_tree_tag_set(&pag->pag_ici_root,
+                        XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
+        __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
+        spin_unlock(&ip->i_flags_lock);
+        read_unlock(&pag->pag_ici_lock);
+        xfs_put_perag(mp, pag);
+}
+void
+__xfs_inode_clear_reclaim_tag(
+        xfs_mount_t     *mp,
+        xfs_perag_t     *pag,
+        xfs_inode_t     *ip)
+{
+        radix_tree_tag_clear(&pag->pag_ici_root,
+                        XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
+}
+void
+xfs_inode_clear_reclaim_tag(
+        xfs_inode_t     *ip)
+{
+        xfs_mount_t     *mp = ip->i_mount;
+        xfs_perag_t     *pag = xfs_get_perag(mp, ip->i_ino);
+        read_lock(&pag->pag_ici_lock);
+        spin_lock(&ip->i_flags_lock);
+        __xfs_inode_clear_reclaim_tag(mp, pag, ip);
+        spin_unlock(&ip->i_flags_lock);
+        read_unlock(&pag->pag_ici_lock);
+        xfs_put_perag(mp, pag);
+}
+STATIC void
+xfs_reclaim_inodes_ag(
+        xfs_mount_t     *mp,
+        int             ag,
+        int             noblock,
+        int             mode)
+{
+        xfs_inode_t     *ip = NULL;
+        xfs_perag_t     *pag = &mp->m_perag[ag];
+        int             nr_found;
+        uint32_t        first_index;
+        int             skipped;
+restart:
+        first_index = 0;
+        skipped = 0;
+        do {
+                /*
+                 * use a gang lookup to find the next inode in the tree
+                 * as the tree is sparse and a gang lookup walks to find
+                 * the number of objects requested.
+                 */
+                read_lock(&pag->pag_ici_lock);
+                nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
+                                        (void**)&ip, first_index, 1,
+                                        XFS_ICI_RECLAIM_TAG);
+                if (!nr_found) {
+                        read_unlock(&pag->pag_ici_lock);
+                        break;
+                }
+                /*
+                 * Update the index for the next lookup. Catch overflows
+                 * into the next AG range which can occur if we have inodes
+                 * in the last block of the AG and we are currently
+                 * pointing to the last inode.
+                 */
+                first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+                if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
+                        read_unlock(&pag->pag_ici_lock);
+                        break;
+                }
+                /* ignore if already under reclaim */
+                if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
+                        read_unlock(&pag->pag_ici_lock);
+                        continue;
+                }
+                if (noblock) {
+                        if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
+                                read_unlock(&pag->pag_ici_lock);
+                                continue;
+                        }
+                        if (xfs_ipincount(ip) ||
+                            !xfs_iflock_nowait(ip)) {
+                                xfs_iunlock(ip, XFS_ILOCK_EXCL);
+                                read_unlock(&pag->pag_ici_lock);
+                                continue;
+                        }
+                }
+                read_unlock(&pag->pag_ici_lock);
+                /*
+                 * hmmm - this is an inode already in reclaim. Do
+                 * we even bother catching it here?
+                 */
+                if (xfs_reclaim_inode(ip, noblock, mode))
+                        skipped++;
+        } while (nr_found);
+        if (skipped) {
+                delay(1);
+                goto restart;
+        }
+        return;
+}
+int
+xfs_reclaim_inodes(
+        xfs_mount_t     *mp,
+        int              noblock,
+        int             mode)
+{
+        int             i;
+        for (i = 0; i < mp->m_sb.sb_agcount; i++) {
+                if (!mp->m_perag[i].pag_ici_init)
+                        continue;
+                xfs_reclaim_inodes_ag(mp, i, noblock, mode);
+        }
+        return 0;
+}
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
new file mode 100644
index 000000000000..5f6de1efe1f6
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef XFS_SYNC_H
+#define XFS_SYNC_H 1
+struct xfs_mount;
+typedef struct bhv_vfs_sync_work {
+        struct list_head        w_list;
+        struct xfs_mount        *w_mount;
+        void                    *w_data;        /* syncer routine argument */
+        void                    (*w_syncer)(struct xfs_mount *, void *);
+} bhv_vfs_sync_work_t;
+#define SYNC_ATTR               0x0001  /* sync attributes */
+#define SYNC_DELWRI             0x0002  /* look at delayed writes */
+#define SYNC_WAIT               0x0004  /* wait for i/o to complete */
+#define SYNC_BDFLUSH            0x0008  /* BDFLUSH is calling -- don't block */
+#define SYNC_IOWAIT             0x0010  /* wait for all I/O to complete */
+int xfs_syncd_init(struct xfs_mount *mp);
+void xfs_syncd_stop(struct xfs_mount *mp);
+int xfs_sync_inodes(struct xfs_mount *mp, int flags);
+int xfs_sync_fsdata(struct xfs_mount *mp, int flags);
+int xfs_quiesce_data(struct xfs_mount *mp);
+void xfs_quiesce_attr(struct xfs_mount *mp);
+void xfs_flush_inode(struct xfs_inode *ip);
+void xfs_flush_device(struct xfs_inode *ip);
+int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode);
+int xfs_reclaim_inodes(struct xfs_mount *mp, int noblock, int mode);
+void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
+void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip);
+void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
+                                struct xfs_inode *ip);
+#endif
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
index 7dacb5bbde3f..916c0ffb6083 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.c
+++ b/fs/xfs/linux-2.6/xfs_sysctl.c
@@ -56,17 +56,6 @@ xfs_stats_clear_proc_handler(
 static ctl_table xfs_table[] = {
        {
-                .ctl_name       = XFS_RESTRICT_CHOWN,
-                .procname       = "restrict_chown",
-                .data           = &xfs_params.restrict_chown.val,
-                .maxlen         = sizeof(int),
-                .mode           = 0644,
-                .proc_handler   = &proc_dointvec_minmax,
-                .strategy       = &sysctl_intvec,
-                .extra1         = &xfs_params.restrict_chown.min,
-                .extra2         = &xfs_params.restrict_chown.max
-        },
-        {
                .ctl_name       = XFS_SGID_INHERIT,
                .procname       = "irix_sgid_inherit",
                .data           = &xfs_params.sgid_inherit.val,
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.h b/fs/xfs/linux-2.6/xfs_sysctl.h
index 4aadb8056c37..b9937d450f8e 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.h
+++ b/fs/xfs/linux-2.6/xfs_sysctl.h
@@ -31,7 +31,6 @@ typedef struct xfs_sysctl_val {
 } xfs_sysctl_val_t;
 typedef struct xfs_param {
-        xfs_sysctl_val_t restrict_chown;/* Root/non-root can give away files.*/
        xfs_sysctl_val_t sgid_inherit;  /* Inherit S_ISGID if process' GID is
                                         * not a member of parent dir GID. */
        xfs_sysctl_val_t symlink_mode;  /* Link creat mode affected by umask */
@@ -68,7 +67,7 @@ typedef struct xfs_param {
 enum {
        /* XFS_REFCACHE_SIZE = 1 */
        /* XFS_REFCACHE_PURGE = 2 */
-        XFS_RESTRICT_CHOWN = 3,
+        /* XFS_RESTRICT_CHOWN = 3 */
        XFS_SGID_INHERIT = 4,
        XFS_SYMLINK_MODE = 5,
        XFS_PANIC_MASK = 6,
diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h
deleted file mode 100644
index 7e60c7776b1c..000000000000
--- a/fs/xfs/linux-2.6/xfs_vfs.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_VFS_H__
-#define __XFS_VFS_H__
-#include <linux/vfs.h>
-#include "xfs_fs.h"
-struct inode;
-struct fid;
-struct cred;
-struct seq_file;
-struct super_block;
-struct xfs_inode;
-struct xfs_mount;
-struct xfs_mount_args;
-typedef struct kstatfs  bhv_statvfs_t;
-typedef struct bhv_vfs_sync_work {
-        struct list_head        w_list;
-        struct xfs_mount        *w_mount;
-        void                    *w_data;        /* syncer routine argument */
-        void                    (*w_syncer)(struct xfs_mount *, void *);
-} bhv_vfs_sync_work_t;
-#define SYNC_ATTR               0x0001  /* sync attributes */
-#define SYNC_CLOSE              0x0002  /* close file system down */
-#define SYNC_DELWRI             0x0004  /* look at delayed writes */
-#define SYNC_WAIT               0x0008  /* wait for i/o to complete */
-#define SYNC_BDFLUSH            0x0010  /* BDFLUSH is calling -- don't block */
-#define SYNC_FSDATA             0x0020  /* flush fs data (e.g. superblocks) */
-#define SYNC_REFCACHE           0x0040  /* prune some of the nfs ref cache */
-#define SYNC_REMOUNT            0x0080  /* remount readonly, no dummy LRs */
-#define SYNC_IOWAIT             0x0100  /* wait for all I/O to complete */
-/*
- * When remounting a filesystem read-only or freezing the filesystem,
- * we have two phases to execute. This first phase is syncing the data
- * before we quiesce the fielsystem, and the second is flushing all the
- * inodes out after we've waited for all the transactions created by
- * the first phase to complete. The second phase uses SYNC_INODE_QUIESCE
- * to ensure that the inodes are written to their location on disk
- * rather than just existing in transactions in the log. This means
- * after a quiesce there is no log replay required to write the inodes
- * to disk (this is the main difference between a sync and a quiesce).
- */
-#define SYNC_DATA_QUIESCE       (SYNC_DELWRI|SYNC_FSDATA|SYNC_WAIT|SYNC_IOWAIT)
-#define SYNC_INODE_QUIESCE      (SYNC_REMOUNT|SYNC_ATTR|SYNC_WAIT)
-#define SHUTDOWN_META_IO_ERROR  0x0001  /* write attempt to metadata failed */
-#define SHUTDOWN_LOG_IO_ERROR   0x0002  /* write attempt to the log failed */
-#define SHUTDOWN_FORCE_UMOUNT   0x0004  /* shutdown from a forced unmount */
-#define SHUTDOWN_CORRUPT_INCORE 0x0008  /* corrupt in-memory data structures */
-#define SHUTDOWN_REMOTE_REQ     0x0010  /* shutdown came from remote cell */
-#define SHUTDOWN_DEVICE_REQ     0x0020  /* failed all paths to the device */
-#define xfs_test_for_freeze(mp)         ((mp)->m_super->s_frozen)
-#define xfs_wait_for_freeze(mp,l)       vfs_check_frozen((mp)->m_super, (l))
-#endif  /* __XFS_VFS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
deleted file mode 100644
index b52528bbbfff..000000000000
--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_vnodeops.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-/*
- * And this gunk is needed for xfs_mount.h"
- */
-#include "xfs_log.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_dmapi.h"
-#include "xfs_inum.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-/*
- * Dedicated vnode inactive/reclaim sync wait queues.
- * Prime number of hash buckets since address is used as the key.
- */
-#define NVSYNC                  37
-#define vptosync(v)             (&vsync[((unsigned long)v) % NVSYNC])
-static wait_queue_head_t vsync[NVSYNC];
-void __init
-vn_init(void)
-{
-        int i;
-        for (i = 0; i < NVSYNC; i++)
-                init_waitqueue_head(&vsync[i]);
-}
-void
-vn_iowait(
-        xfs_inode_t     *ip)
-{
-        wait_queue_head_t *wq = vptosync(ip);
-        wait_event(*wq, (atomic_read(&ip->i_iocount) == 0));
-}
-void
-vn_iowake(
-        xfs_inode_t     *ip)
-{
-        if (atomic_dec_and_test(&ip->i_iocount))
-                wake_up(vptosync(ip));
-}
-/*
- * Volume managers supporting multiple paths can send back ENODEV when the
- * final path disappears.  In this case continuing to fill the page cache
- * with dirty data which cannot be written out is evil, so prevent that.
- */
-void
-vn_ioerror(
-        xfs_inode_t     *ip,
-        int             error,
-        char            *f,
-        int             l)
-{
-        if (unlikely(error == -ENODEV))
-                xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ, f, l);
-}
-#ifdef  XFS_INODE_TRACE
-/*
- * Reference count of Linux inode if present, -1 if the xfs_inode
- * has no associated Linux inode.
- */
-static inline int xfs_icount(struct xfs_inode *ip)
-{
-        struct inode *vp = VFS_I(ip);
-        if (vp)
-                return vn_count(vp);
-        return -1;
-}
-#define KTRACE_ENTER(ip, vk, s, line, ra)                       \
-        ktrace_enter(   (ip)->i_trace,                          \
-/*  0 */                (void *)(__psint_t)(vk),                \
-/*  1 */                (void *)(s),                            \
-/*  2 */                (void *)(__psint_t) line,               \
-/*  3 */                (void *)(__psint_t)xfs_icount(ip),      \
-/*  4 */                (void *)(ra),                           \
-/*  5 */                NULL,                                   \
-/*  6 */                (void *)(__psint_t)current_cpu(),       \
-/*  7 */                (void *)(__psint_t)current_pid(),       \
-/*  8 */                (void *)__return_address,               \
-/*  9 */                NULL, NULL, NULL, NULL, NULL, NULL, NULL)
-/*
- * Vnode tracing code.
- */
-void
-_xfs_itrace_entry(xfs_inode_t *ip, const char *func, inst_t *ra)
-{
-        KTRACE_ENTER(ip, INODE_KTRACE_ENTRY, func, 0, ra);
-}
-void
-_xfs_itrace_exit(xfs_inode_t *ip, const char *func, inst_t *ra)
-{
-        KTRACE_ENTER(ip, INODE_KTRACE_EXIT, func, 0, ra);
-}
-void
-xfs_itrace_hold(xfs_inode_t *ip, char *file, int line, inst_t *ra)
-{
-        KTRACE_ENTER(ip, INODE_KTRACE_HOLD, file, line, ra);
-}
-void
-_xfs_itrace_ref(xfs_inode_t *ip, char *file, int line, inst_t *ra)
-{
-        KTRACE_ENTER(ip, INODE_KTRACE_REF, file, line, ra);
-}
-void
-xfs_itrace_rele(xfs_inode_t *ip, char *file, int line, inst_t *ra)
-{
-        KTRACE_ENTER(ip, INODE_KTRACE_RELE, file, line, ra);
-}
-#endif  /* XFS_INODE_TRACE */
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 683ce16210ff..f65983a230d3 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -18,7 +18,10 @@
 #ifndef __XFS_VNODE_H__
 #define __XFS_VNODE_H__
+#include "xfs_fs.h"
 struct file;
+struct xfs_inode;
 struct xfs_iomap;
 struct attrlist_cursor_kern;
@@ -51,40 +54,6 @@ struct attrlist_cursor_kern;
                                           Prevent VM access to the pages until
                                           the operation completes. */
-extern void     vn_init(void);
-/*
- * Yeah, these don't take vnode anymore at all, all this should be
- * cleaned up at some point.
- */
-extern void     vn_iowait(struct xfs_inode *ip);
-extern void     vn_iowake(struct xfs_inode *ip);
-extern void     vn_ioerror(struct xfs_inode *ip, int error, char *f, int l);
-static inline int vn_count(struct inode *vp)
-{
-        return atomic_read(&vp->i_count);
-}
-#define IHOLD(ip) \
-do { \
-        ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \
-        atomic_inc(&(VFS_I(ip)->i_count)); \
-        xfs_itrace_hold((ip), __FILE__, __LINE__, (inst_t *)__return_address); \
-} while (0)
-#define IRELE(ip) \
-do { \
-        xfs_itrace_rele((ip), __FILE__, __LINE__, (inst_t *)__return_address); \
-        iput(VFS_I(ip)); \
-} while (0)
-static inline struct inode *vn_grab(struct inode *vp)
-{
-        return igrab(vp);
-}
 /*
 * Dealing with bad inodes
 */
@@ -121,39 +90,4 @@ static inline void vn_atime_to_time_t(struct inode *vp, time_t *tt)
                                        PAGECACHE_TAG_DIRTY)
-/*
- * Tracking vnode activity.
- */
-#if defined(XFS_INODE_TRACE)
-#define INODE_TRACE_SIZE        16              /* number of trace entries */
-#define INODE_KTRACE_ENTRY      1
-#define INODE_KTRACE_EXIT       2
-#define INODE_KTRACE_HOLD       3
-#define INODE_KTRACE_REF        4
-#define INODE_KTRACE_RELE       5
-extern void _xfs_itrace_entry(struct xfs_inode *, const char *, inst_t *);
-extern void _xfs_itrace_exit(struct xfs_inode *, const char *, inst_t *);
-extern void xfs_itrace_hold(struct xfs_inode *, char *, int, inst_t *);
-extern void _xfs_itrace_ref(struct xfs_inode *, char *, int, inst_t *);
-extern void xfs_itrace_rele(struct xfs_inode *, char *, int, inst_t *);
-#define xfs_itrace_entry(ip)    \
-        _xfs_itrace_entry(ip, __func__, (inst_t *)__return_address)
-#define xfs_itrace_exit(ip)     \
-        _xfs_itrace_exit(ip, __func__, (inst_t *)__return_address)
-#define xfs_itrace_exit_tag(ip, tag)    \
-        _xfs_itrace_exit(ip, tag, (inst_t *)__return_address)
-#define xfs_itrace_ref(ip)      \
-        _xfs_itrace_ref(ip, __FILE__, __LINE__, (inst_t *)__return_address)
-#else
-#define xfs_itrace_entry(a)
-#define xfs_itrace_exit(a)
-#define xfs_itrace_exit_tag(a, b)
-#define xfs_itrace_hold(a, b, c, d)
-#define xfs_itrace_ref(a)
-#define xfs_itrace_rele(a, b, c, d)
-#endif
 #endif  /* __XFS_VNODE_H__ */