38 files changed, 12989 insertions, 0 deletions
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
new file mode 100644
index 000000000000..364ea8c386b1
--- /dev/null
+++ b/fs/xfs/linux-2.6/kmem.c
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/highmem.h>
+#include <linux/swap.h>
+#include <linux/blkdev.h>
+#include "time.h"
+#include "kmem.h"
+#define MAX_VMALLOCS    6
+#define MAX_SLAB_SIZE   0x20000
+void *
+kmem_alloc(size_t size, int flags)
+{
+        int     retries = 0;
+        int     lflags = kmem_flags_convert(flags);
+        void    *ptr;
+        do {
+                if (size < MAX_SLAB_SIZE || retries > MAX_VMALLOCS)
+                        ptr = kmalloc(size, lflags);
+                else
+                        ptr = __vmalloc(size, lflags, PAGE_KERNEL);
+                if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
+                        return ptr;
+                if (!(++retries % 100))
+                        printk(KERN_ERR "XFS: possible memory allocation "
+                                        "deadlock in %s (mode:0x%x)\n",
+                                        __FUNCTION__, lflags);
+                blk_congestion_wait(WRITE, HZ/50);
+        } while (1);
+}
+void *
+kmem_zalloc(size_t size, int flags)
+{
+        void    *ptr;
+        ptr = kmem_alloc(size, flags);
+        if (ptr)
+                memset((char *)ptr, 0, (int)size);
+        return ptr;
+}
+void
+kmem_free(void *ptr, size_t size)
+{
+        if (((unsigned long)ptr < VMALLOC_START) ||
+            ((unsigned long)ptr >= VMALLOC_END)) {
+                kfree(ptr);
+        } else {
+                vfree(ptr);
+        }
+}
+void *
+kmem_realloc(void *ptr, size_t newsize, size_t oldsize, int flags)
+{
+        void    *new;
+        new = kmem_alloc(newsize, flags);
+        if (ptr) {
+                if (new)
+                        memcpy(new, ptr,
+                                ((oldsize < newsize) ? oldsize : newsize));
+                kmem_free(ptr, oldsize);
+        }
+        return new;
+}
+void *
+kmem_zone_alloc(kmem_zone_t *zone, int flags)
+{
+        int     retries = 0;
+        int     lflags = kmem_flags_convert(flags);
+        void    *ptr;
+        do {
+                ptr = kmem_cache_alloc(zone, lflags);
+                if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
+                        return ptr;
+                if (!(++retries % 100))
+                        printk(KERN_ERR "XFS: possible memory allocation "
+                                        "deadlock in %s (mode:0x%x)\n",
+                                        __FUNCTION__, lflags);
+                blk_congestion_wait(WRITE, HZ/50);
+        } while (1);
+}
+void *
+kmem_zone_zalloc(kmem_zone_t *zone, int flags)
+{
+        void    *ptr;
+        ptr = kmem_zone_alloc(zone, flags);
+        if (ptr)
+                memset((char *)ptr, 0, kmem_cache_size(zone));
+        return ptr;
+}
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
new file mode 100644
index 000000000000..1397b669b059
--- /dev/null
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SUPPORT_KMEM_H__
+#define __XFS_SUPPORT_KMEM_H__
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+/*
+ * memory management routines
+ */
+#define KM_SLEEP        0x0001
+#define KM_NOSLEEP      0x0002
+#define KM_NOFS         0x0004
+#define KM_MAYFAIL      0x0008
+#define kmem_zone       kmem_cache_s
+#define kmem_zone_t     kmem_cache_t
+typedef unsigned long xfs_pflags_t;
+#define PFLAGS_TEST_NOIO()              (current->flags & PF_NOIO)
+#define PFLAGS_TEST_FSTRANS()           (current->flags & PF_FSTRANS)
+#define PFLAGS_SET_NOIO() do {          \
+        current->flags |= PF_NOIO;      \
+} while (0)
+#define PFLAGS_CLEAR_NOIO() do {        \
+        current->flags &= ~PF_NOIO;     \
+} while (0)
+/* these could be nested, so we save state */
+#define PFLAGS_SET_FSTRANS(STATEP) do { \
+        *(STATEP) = current->flags;     \
+        current->flags |= PF_FSTRANS;   \
+} while (0)
+#define PFLAGS_CLEAR_FSTRANS(STATEP) do { \
+        *(STATEP) = current->flags;     \
+        current->flags &= ~PF_FSTRANS;  \
+} while (0)
+/* Restore the PF_FSTRANS state to what was saved in STATEP */
+#define PFLAGS_RESTORE_FSTRANS(STATEP) do {                     \
+        current->flags = ((current->flags & ~PF_FSTRANS) |      \
+                          (*(STATEP) & PF_FSTRANS));            \
+} while (0)
+#define PFLAGS_DUP(OSTATEP, NSTATEP) do { \
+        *(NSTATEP) = *(OSTATEP);        \
+} while (0)
+static __inline unsigned int kmem_flags_convert(int flags)
+{
+        int     lflags = __GFP_NOWARN;  /* we'll report problems, if need be */
+#ifdef DEBUG
+        if (unlikely(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL))) {
+                printk(KERN_WARNING
+                    "XFS: memory allocation with wrong flags (%x)\n", flags);
+                BUG();
+        }
+#endif
+        if (flags & KM_NOSLEEP) {
+                lflags |= GFP_ATOMIC;
+        } else {
+                lflags |= GFP_KERNEL;
+                /* avoid recusive callbacks to filesystem during transactions */
+                if (PFLAGS_TEST_FSTRANS() || (flags & KM_NOFS))
+                        lflags &= ~__GFP_FS;
+        }
+        
+        return lflags;
+}
+static __inline kmem_zone_t *
+kmem_zone_init(int size, char *zone_name)
+{
+        return kmem_cache_create(zone_name, size, 0, 0, NULL, NULL);
+}
+static __inline void
+kmem_zone_free(kmem_zone_t *zone, void *ptr)
+{
+        kmem_cache_free(zone, ptr);
+}
+static __inline void
+kmem_zone_destroy(kmem_zone_t *zone)
+{
+        if (zone && kmem_cache_destroy(zone))
+                BUG();
+}
+extern void         *kmem_zone_zalloc(kmem_zone_t *, int);
+extern void         *kmem_zone_alloc(kmem_zone_t *, int);
+extern void         *kmem_alloc(size_t, int);
+extern void         *kmem_realloc(void *, size_t, size_t, int);
+extern void         *kmem_zalloc(size_t, int);
+extern void         kmem_free(void *, size_t);
+typedef struct shrinker *kmem_shaker_t;
+typedef int (*kmem_shake_func_t)(int, unsigned int);
+static __inline kmem_shaker_t
+kmem_shake_register(kmem_shake_func_t sfunc)
+{
+        return set_shrinker(DEFAULT_SEEKS, sfunc);
+}
+static __inline void
+kmem_shake_deregister(kmem_shaker_t shrinker)
+{
+        remove_shrinker(shrinker);
+}
+static __inline int
+kmem_shake_allow(unsigned int gfp_mask)
+{
+        return (gfp_mask & __GFP_WAIT);
+}
+#endif /* __XFS_SUPPORT_KMEM_H__ */
diff --git a/fs/xfs/linux-2.6/mrlock.h b/fs/xfs/linux-2.6/mrlock.h
new file mode 100644
index 000000000000..d2c11a098ff2
--- /dev/null
+++ b/fs/xfs/linux-2.6/mrlock.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SUPPORT_MRLOCK_H__
+#define __XFS_SUPPORT_MRLOCK_H__
+#include <linux/rwsem.h>
+enum { MR_NONE, MR_ACCESS, MR_UPDATE };
+typedef struct {
+        struct rw_semaphore     mr_lock;
+        int                     mr_writer;
+} mrlock_t;
+#define mrinit(mrp, name)       \
+        ( (mrp)->mr_writer = 0, init_rwsem(&(mrp)->mr_lock) )
+#define mrlock_init(mrp, t,n,s) mrinit(mrp, n)
+#define mrfree(mrp)             do { } while (0)
+#define mraccess(mrp)           mraccessf(mrp, 0)
+#define mrupdate(mrp)           mrupdatef(mrp, 0)
+static inline void mraccessf(mrlock_t *mrp, int flags)
+{
+        down_read(&mrp->mr_lock);
+}
+static inline void mrupdatef(mrlock_t *mrp, int flags)
+{
+        down_write(&mrp->mr_lock);
+        mrp->mr_writer = 1;
+}
+static inline int mrtryaccess(mrlock_t *mrp)
+{
+        return down_read_trylock(&mrp->mr_lock);
+}
+static inline int mrtryupdate(mrlock_t *mrp)
+{
+        if (!down_write_trylock(&mrp->mr_lock))
+                return 0;
+        mrp->mr_writer = 1;
+        return 1;
+}
+static inline void mrunlock(mrlock_t *mrp)
+{
+        if (mrp->mr_writer) {
+                mrp->mr_writer = 0;
+                up_write(&mrp->mr_lock);
+        } else {
+                up_read(&mrp->mr_lock);
+        }
+}
+static inline void mrdemote(mrlock_t *mrp)
+{
+        mrp->mr_writer = 0;
+        downgrade_write(&mrp->mr_lock);
+}
+#ifdef DEBUG
+/*
+ * Debug-only routine, without some platform-specific asm code, we can
+ * now only answer requests regarding whether we hold the lock for write
+ * (reader state is outside our visibility, we only track writer state).
+ * Note: means !ismrlocked would give false positivies, so don't do that.
+ */
+static inline int ismrlocked(mrlock_t *mrp, int type)
+{
+        if (mrp && type == MR_UPDATE)
+                return mrp->mr_writer;
+        return 1;
+}
+#endif
+#endif /* __XFS_SUPPORT_MRLOCK_H__ */
diff --git a/fs/xfs/linux-2.6/mutex.h b/fs/xfs/linux-2.6/mutex.h
new file mode 100644
index 000000000000..0b296bb944cb
--- /dev/null
+++ b/fs/xfs/linux-2.6/mutex.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SUPPORT_MUTEX_H__
+#define __XFS_SUPPORT_MUTEX_H__
+#include <linux/spinlock.h>
+#include <asm/semaphore.h>
+/*
+ * Map the mutex'es from IRIX to Linux semaphores.
+ *
+ * Destroy just simply initializes to -99 which should block all other
+ * callers.
+ */
+#define MUTEX_DEFAULT           0x0
+typedef struct semaphore        mutex_t;
+#define mutex_init(lock, type, name)            sema_init(lock, 1)
+#define mutex_destroy(lock)                     sema_init(lock, -99)
+#define mutex_lock(lock, num)                   down(lock)
+#define mutex_trylock(lock)                     (down_trylock(lock) ? 0 : 1)
+#define mutex_unlock(lock)                      up(lock)
+#endif /* __XFS_SUPPORT_MUTEX_H__ */
diff --git a/fs/xfs/linux-2.6/sema.h b/fs/xfs/linux-2.6/sema.h
new file mode 100644
index 000000000000..30b67b4e1cbf
--- /dev/null
+++ b/fs/xfs/linux-2.6/sema.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SUPPORT_SEMA_H__
+#define __XFS_SUPPORT_SEMA_H__
+#include <linux/time.h>
+#include <linux/wait.h>
+#include <asm/atomic.h>
+#include <asm/semaphore.h>
+/*
+ * sema_t structure just maps to struct semaphore in Linux kernel.
+ */
+typedef struct semaphore sema_t;
+#define init_sema(sp, val, c, d)        sema_init(sp, val)
+#define initsema(sp, val)               sema_init(sp, val)
+#define initnsema(sp, val, name)        sema_init(sp, val)
+#define psema(sp, b)                    down(sp)
+#define vsema(sp)                       up(sp)
+#define valusema(sp)                    (atomic_read(&(sp)->count))
+#define freesema(sema)
+/*
+ * Map cpsema (try to get the sema) to down_trylock. We need to switch
+ * the return values since cpsema returns 1 (acquired) 0 (failed) and
+ * down_trylock returns the reverse 0 (acquired) 1 (failed).
+ */
+#define cpsema(sp)                      (down_trylock(sp) ? 0 : 1)
+/*
+ * Didn't do cvsema(sp). Not sure how to map this to up/down/...
+ * It does a vsema if the values is < 0 other wise nothing.
+ */
+#endif /* __XFS_SUPPORT_SEMA_H__ */
diff --git a/fs/xfs/linux-2.6/spin.h b/fs/xfs/linux-2.6/spin.h
new file mode 100644
index 000000000000..bcf60a0b8df0
--- /dev/null
+++ b/fs/xfs/linux-2.6/spin.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SUPPORT_SPIN_H__
+#define __XFS_SUPPORT_SPIN_H__
+#include <linux/sched.h>        /* preempt needs this */
+#include <linux/spinlock.h>
+/*
+ * Map lock_t from IRIX to Linux spinlocks.
+ *
+ * We do not make use of lock_t from interrupt context, so we do not
+ * have to worry about disabling interrupts at all (unlike IRIX).
+ */
+typedef spinlock_t lock_t;
+#define SPLDECL(s)                      unsigned long s
+#define spinlock_init(lock, name)       spin_lock_init(lock)
+#define spinlock_destroy(lock)
+#define mutex_spinlock(lock)            ({ spin_lock(lock); 0; })
+#define mutex_spinunlock(lock, s)       do { spin_unlock(lock); (void)s; } while (0)
+#define nested_spinlock(lock)           spin_lock(lock)
+#define nested_spinunlock(lock)         spin_unlock(lock)
+#endif /* __XFS_SUPPORT_SPIN_H__ */
diff --git a/fs/xfs/linux-2.6/sv.h b/fs/xfs/linux-2.6/sv.h
new file mode 100644
index 000000000000..821d3167e05b
--- /dev/null
+++ b/fs/xfs/linux-2.6/sv.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SUPPORT_SV_H__
+#define __XFS_SUPPORT_SV_H__
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+/*
+ * Synchronisation variables.
+ *
+ * (Parameters "pri", "svf" and "rts" are not implemented)
+ */
+typedef struct sv_s {
+        wait_queue_head_t waiters;
+} sv_t;
+#define SV_FIFO         0x0             /* sv_t is FIFO type */
+#define SV_LIFO         0x2             /* sv_t is LIFO type */
+#define SV_PRIO         0x4             /* sv_t is PRIO type */
+#define SV_KEYED        0x6             /* sv_t is KEYED type */
+#define SV_DEFAULT      SV_FIFO
+static inline void _sv_wait(sv_t *sv, spinlock_t *lock, int state,
+                             unsigned long timeout)
+{
+        DECLARE_WAITQUEUE(wait, current);
+        add_wait_queue_exclusive(&sv->waiters, &wait);
+        __set_current_state(state);
+        spin_unlock(lock);
+        schedule_timeout(timeout);
+        remove_wait_queue(&sv->waiters, &wait);
+}
+#define init_sv(sv,type,name,flag) \
+        init_waitqueue_head(&(sv)->waiters)
+#define sv_init(sv,flag,name) \
+        init_waitqueue_head(&(sv)->waiters)
+#define sv_destroy(sv) \
+        /*NOTHING*/
+#define sv_wait(sv, pri, lock, s) \
+        _sv_wait(sv, lock, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT)
+#define sv_wait_sig(sv, pri, lock, s)   \
+        _sv_wait(sv, lock, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT)
+#define sv_timedwait(sv, pri, lock, s, svf, ts, rts) \
+        _sv_wait(sv, lock, TASK_UNINTERRUPTIBLE, timespec_to_jiffies(ts))
+#define sv_timedwait_sig(sv, pri, lock, s, svf, ts, rts) \
+        _sv_wait(sv, lock, TASK_INTERRUPTIBLE, timespec_to_jiffies(ts))
+#define sv_signal(sv) \
+        wake_up(&(sv)->waiters)
+#define sv_broadcast(sv) \
+        wake_up_all(&(sv)->waiters)
+#endif /* __XFS_SUPPORT_SV_H__ */
diff --git a/fs/xfs/linux-2.6/time.h b/fs/xfs/linux-2.6/time.h
new file mode 100644
index 000000000000..6c6fd0faa8e1
--- /dev/null
+++ b/fs/xfs/linux-2.6/time.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SUPPORT_TIME_H__
+#define __XFS_SUPPORT_TIME_H__
+#include <linux/sched.h>
+#include <linux/time.h>
+typedef struct timespec timespec_t;
+static inline void delay(long ticks)
+{
+        set_current_state(TASK_UNINTERRUPTIBLE);
+        schedule_timeout(ticks);
+}
+static inline void nanotime(struct timespec *tvp)
+{
+        *tvp = CURRENT_TIME;
+}
+#endif /* __XFS_SUPPORT_TIME_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
new file mode 100644
index 000000000000..76a84758073a
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -0,0 +1,1275 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#include "xfs.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_sb.h"
+#include "xfs_dir.h"
+#include "xfs_dir2.h"
+#include "xfs_trans.h"
+#include "xfs_dmapi.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_alloc.h"
+#include "xfs_btree.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dir_sf.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_error.h"
+#include "xfs_rw.h"
+#include "xfs_iomap.h"
+#include <linux/mpage.h>
+#include <linux/writeback.h>
+STATIC void xfs_count_page_state(struct page *, int *, int *, int *);
+STATIC void xfs_convert_page(struct inode *, struct page *, xfs_iomap_t *,
+                struct writeback_control *wbc, void *, int, int);
+#if defined(XFS_RW_TRACE)
+void
+xfs_page_trace(
+        int             tag,
+        struct inode    *inode,
+        struct page     *page,
+        int             mask)
+{
+        xfs_inode_t     *ip;
+        bhv_desc_t      *bdp;
+        vnode_t         *vp = LINVFS_GET_VP(inode);
+        loff_t          isize = i_size_read(inode);
+        loff_t          offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
+        int             delalloc = -1, unmapped = -1, unwritten = -1;
+        if (page_has_buffers(page))
+                xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
+        bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops);
+        ip = XFS_BHVTOI(bdp);
+        if (!ip->i_rwtrace)
+                return;
+        ktrace_enter(ip->i_rwtrace,
+                (void *)((unsigned long)tag),
+                (void *)ip,
+                (void *)inode,
+                (void *)page,
+                (void *)((unsigned long)mask),
+                (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)),
+                (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)),
+                (void *)((unsigned long)((isize >> 32) & 0xffffffff)),
+                (void *)((unsigned long)(isize & 0xffffffff)),
+                (void *)((unsigned long)((offset >> 32) & 0xffffffff)),
+                (void *)((unsigned long)(offset & 0xffffffff)),
+                (void *)((unsigned long)delalloc),
+                (void *)((unsigned long)unmapped),
+                (void *)((unsigned long)unwritten),
+                (void *)NULL,
+                (void *)NULL);
+}
+#else
+#define xfs_page_trace(tag, inode, page, mask)
+#endif
+void
+linvfs_unwritten_done(
+        struct buffer_head      *bh,
+        int                     uptodate)
+{
+        xfs_buf_t               *pb = (xfs_buf_t *)bh->b_private;
+        ASSERT(buffer_unwritten(bh));
+        bh->b_end_io = NULL;
+        clear_buffer_unwritten(bh);
+        if (!uptodate)
+                pagebuf_ioerror(pb, EIO);
+        if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {
+                pagebuf_iodone(pb, 1, 1);
+        }
+        end_buffer_async_write(bh, uptodate);
+}
+/*
+ * Issue transactions to convert a buffer range from unwritten
+ * to written extents (buffered IO).
+ */
+STATIC void
+linvfs_unwritten_convert(
+        xfs_buf_t       *bp)
+{
+        vnode_t         *vp = XFS_BUF_FSPRIVATE(bp, vnode_t *);
+        int             error;
+        BUG_ON(atomic_read(&bp->pb_hold) < 1);
+        VOP_BMAP(vp, XFS_BUF_OFFSET(bp), XFS_BUF_SIZE(bp),
+                        BMAPI_UNWRITTEN, NULL, NULL, error);
+        XFS_BUF_SET_FSPRIVATE(bp, NULL);
+        XFS_BUF_CLR_IODONE_FUNC(bp);
+        XFS_BUF_UNDATAIO(bp);
+        iput(LINVFS_GET_IP(vp));
+        pagebuf_iodone(bp, 0, 0);
+}
+/*
+ * Issue transactions to convert a buffer range from unwritten
+ * to written extents (direct IO).
+ */
+STATIC void
+linvfs_unwritten_convert_direct(
+        struct inode    *inode,
+        loff_t          offset,
+        ssize_t         size,
+        void            *private)
+{
+        ASSERT(!private || inode == (struct inode *)private);
+        /* private indicates an unwritten extent lay beneath this IO */
+        if (private && size > 0) {
+                vnode_t *vp = LINVFS_GET_VP(inode);
+                int     error;
+                VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error);
+        }
+}
+STATIC int
+xfs_map_blocks(
+        struct inode            *inode,
+        loff_t                  offset,
+        ssize_t                 count,
+        xfs_iomap_t             *mapp,
+        int                     flags)
+{
+        vnode_t                 *vp = LINVFS_GET_VP(inode);
+        int                     error, nmaps = 1;
+        VOP_BMAP(vp, offset, count, flags, mapp, &nmaps, error);
+        if (!error && (flags & (BMAPI_WRITE|BMAPI_ALLOCATE)))
+                VMODIFY(vp);
+        return -error;
+}
+/*
+ * Finds the corresponding mapping in block @map array of the
+ * given @offset within a @page.
+ */
+STATIC xfs_iomap_t *
+xfs_offset_to_map(
+        struct page             *page,
+        xfs_iomap_t             *iomapp,
+        unsigned long           offset)
+{
+        loff_t                  full_offset;    /* offset from start of file */
+        ASSERT(offset < PAGE_CACHE_SIZE);
+        full_offset = page->index;              /* NB: using 64bit number */
+        full_offset <<= PAGE_CACHE_SHIFT;       /* offset from file start */
+        full_offset += offset;                  /* offset from page start */
+        if (full_offset < iomapp->iomap_offset)
+                return NULL;
+        if (iomapp->iomap_offset + (iomapp->iomap_bsize -1) >= full_offset)
+                return iomapp;
+        return NULL;
+}
+STATIC void
+xfs_map_at_offset(
+        struct page             *page,
+        struct buffer_head      *bh,
+        unsigned long           offset,
+        int                     block_bits,
+        xfs_iomap_t             *iomapp)
+{
+        xfs_daddr_t             bn;
+        loff_t                  delta;
+        int                     sector_shift;
+        ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE));
+        ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY));
+        ASSERT(iomapp->iomap_bn != IOMAP_DADDR_NULL);
+        delta = page->index;
+        delta <<= PAGE_CACHE_SHIFT;
+        delta += offset;
+        delta -= iomapp->iomap_offset;
+        delta >>= block_bits;
+        sector_shift = block_bits - BBSHIFT;
+        bn = iomapp->iomap_bn >> sector_shift;
+        bn += delta;
+        BUG_ON(!bn && !(iomapp->iomap_flags & IOMAP_REALTIME));
+        ASSERT((bn << sector_shift) >= iomapp->iomap_bn);
+        lock_buffer(bh);
+        bh->b_blocknr = bn;
+        bh->b_bdev = iomapp->iomap_target->pbr_bdev;
+        set_buffer_mapped(bh);
+        clear_buffer_delay(bh);
+}
+/*
+ * Look for a page at index which is unlocked and contains our
+ * unwritten extent flagged buffers at its head.  Returns page
+ * locked and with an extra reference count, and length of the
+ * unwritten extent component on this page that we can write,
+ * in units of filesystem blocks.
+ */
+STATIC struct page *
+xfs_probe_unwritten_page(
+        struct address_space    *mapping,
+        pgoff_t                 index,
+        xfs_iomap_t             *iomapp,
+        xfs_buf_t               *pb,
+        unsigned long           max_offset,
+        unsigned long           *fsbs,
+        unsigned int            bbits)
+{
+        struct page             *page;
+        page = find_trylock_page(mapping, index);
+        if (!page)
+                return NULL;
+        if (PageWriteback(page))
+                goto out;
+        if (page->mapping && page_has_buffers(page)) {
+                struct buffer_head      *bh, *head;
+                unsigned long           p_offset = 0;
+                *fsbs = 0;
+                bh = head = page_buffers(page);
+                do {
+                        if (!buffer_unwritten(bh) || !buffer_uptodate(bh))
+                                break;
+                        if (!xfs_offset_to_map(page, iomapp, p_offset))
+                                break;
+                        if (p_offset >= max_offset)
+                                break;
+                        xfs_map_at_offset(page, bh, p_offset, bbits, iomapp);
+                        set_buffer_unwritten_io(bh);
+                        bh->b_private = pb;
+                        p_offset += bh->b_size;
+                        (*fsbs)++;
+                } while ((bh = bh->b_this_page) != head);
+                if (p_offset)
+                        return page;
+        }
+out:
+        unlock_page(page);
+        return NULL;
+}
+/*
+ * Look for a page at index which is unlocked and not mapped
+ * yet - clustering for mmap write case.
+ */
+STATIC unsigned int
+xfs_probe_unmapped_page(
+        struct address_space    *mapping,
+        pgoff_t                 index,
+        unsigned int            pg_offset)
+{
+        struct page             *page;
+        int                     ret = 0;
+        page = find_trylock_page(mapping, index);
+        if (!page)
+                return 0;
+        if (PageWriteback(page))
+                goto out;
+        if (page->mapping && PageDirty(page)) {
+                if (page_has_buffers(page)) {
+                        struct buffer_head      *bh, *head;
+                        bh = head = page_buffers(page);
+                        do {
+                                if (buffer_mapped(bh) || !buffer_uptodate(bh))
+                                        break;
+                                ret += bh->b_size;
+                                if (ret >= pg_offset)
+                                        break;
+                        } while ((bh = bh->b_this_page) != head);
+                } else
+                        ret = PAGE_CACHE_SIZE;
+        }
+out:
+        unlock_page(page);
+        return ret;
+}
+STATIC unsigned int
+xfs_probe_unmapped_cluster(
+        struct inode            *inode,
+        struct page             *startpage,
+        struct buffer_head      *bh,
+        struct buffer_head      *head)
+{
+        pgoff_t                 tindex, tlast, tloff;
+        unsigned int            pg_offset, len, total = 0;
+        struct address_space    *mapping = inode->i_mapping;
+        /* First sum forwards in this page */
+        do {
+                if (buffer_mapped(bh))
+                        break;
+                total += bh->b_size;
+        } while ((bh = bh->b_this_page) != head);
+        /* If we reached the end of the page, sum forwards in
+         * following pages.
+         */
+        if (bh == head) {
+                tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT;
+                /* Prune this back to avoid pathological behavior */
+                tloff = min(tlast, startpage->index + 64);
+                for (tindex = startpage->index + 1; tindex < tloff; tindex++) {
+                        len = xfs_probe_unmapped_page(mapping, tindex,
+                                                        PAGE_CACHE_SIZE);
+                        if (!len)
+                                return total;
+                        total += len;
+                }
+                if (tindex == tlast &&
+                    (pg_offset = i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) {
+                        total += xfs_probe_unmapped_page(mapping,
+                                                        tindex, pg_offset);
+                }
+        }
+        return total;
+}
+/*
+ * Probe for a given page (index) in the inode and test if it is delayed
+ * and without unwritten buffers.  Returns page locked and with an extra
+ * reference count.
+ */
+STATIC struct page *
+xfs_probe_delalloc_page(
+        struct inode            *inode,
+        pgoff_t                 index)
+{
+        struct page             *page;
+        page = find_trylock_page(inode->i_mapping, index);
+        if (!page)
+                return NULL;
+        if (PageWriteback(page))
+                goto out;
+        if (page->mapping && page_has_buffers(page)) {
+                struct buffer_head      *bh, *head;
+                int                     acceptable = 0;
+                bh = head = page_buffers(page);
+                do {
+                        if (buffer_unwritten(bh)) {
+                                acceptable = 0;
+                                break;
+                        } else if (buffer_delay(bh)) {
+                                acceptable = 1;
+                        }
+                } while ((bh = bh->b_this_page) != head);
+                if (acceptable)
+                        return page;
+        }
+out:
+        unlock_page(page);
+        return NULL;
+}
+STATIC int
+xfs_map_unwritten(
+        struct inode            *inode,
+        struct page             *start_page,
+        struct buffer_head      *head,
+        struct buffer_head      *curr,
+        unsigned long           p_offset,
+        int                     block_bits,
+        xfs_iomap_t             *iomapp,
+        struct writeback_control *wbc,
+        int                     startio,
+        int                     all_bh)
+{
+        struct buffer_head      *bh = curr;
+        xfs_iomap_t             *tmp;
+        xfs_buf_t               *pb;
+        loff_t                  offset, size;
+        unsigned long           nblocks = 0;
+        offset = start_page->index;
+        offset <<= PAGE_CACHE_SHIFT;
+        offset += p_offset;
+        /* get an "empty" pagebuf to manage IO completion
+         * Proper values will be set before returning */
+        pb = pagebuf_lookup(iomapp->iomap_target, 0, 0, 0);
+        if (!pb)
+                return -EAGAIN;
+        /* Take a reference to the inode to prevent it from
+         * being reclaimed while we have outstanding unwritten
+         * extent IO on it.
+         */
+        if ((igrab(inode)) != inode) {
+                pagebuf_free(pb);
+                return -EAGAIN;
+        }
+        /* Set the count to 1 initially, this will stop an I/O
+         * completion callout which happens before we have started
+         * all the I/O from calling pagebuf_iodone too early.
+         */
+        atomic_set(&pb->pb_io_remaining, 1);
+        /* First map forwards in the page consecutive buffers
+         * covering this unwritten extent
+         */
+        do {
+                if (!buffer_unwritten(bh))
+                        break;
+                tmp = xfs_offset_to_map(start_page, iomapp, p_offset);
+                if (!tmp)
+                        break;
+                xfs_map_at_offset(start_page, bh, p_offset, block_bits, iomapp);
+                set_buffer_unwritten_io(bh);
+                bh->b_private = pb;
+                p_offset += bh->b_size;
+                nblocks++;
+        } while ((bh = bh->b_this_page) != head);
+        atomic_add(nblocks, &pb->pb_io_remaining);
+        /* If we reached the end of the page, map forwards in any
+         * following pages which are also covered by this extent.
+         */
+        if (bh == head) {
+                struct address_space    *mapping = inode->i_mapping;
+                pgoff_t                 tindex, tloff, tlast;
+                unsigned long           bs;
+                unsigned int            pg_offset, bbits = inode->i_blkbits;
+                struct page             *page;
+                tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT;
+                tloff = (iomapp->iomap_offset + iomapp->iomap_bsize) >> PAGE_CACHE_SHIFT;
+                tloff = min(tlast, tloff);
+                for (tindex = start_page->index + 1; tindex < tloff; tindex++) {
+                        page = xfs_probe_unwritten_page(mapping,
+                                                tindex, iomapp, pb,
+                                                PAGE_CACHE_SIZE, &bs, bbits);
+                        if (!page)
+                                break;
+                        nblocks += bs;
+                        atomic_add(bs, &pb->pb_io_remaining);
+                        xfs_convert_page(inode, page, iomapp, wbc, pb,
+                                                        startio, all_bh);
+                        /* stop if converting the next page might add
+                         * enough blocks that the corresponding byte
+                         * count won't fit in our ulong page buf length */
+                        if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits))
+                                goto enough;
+                }
+                if (tindex == tlast &&
+                    (pg_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)))) {
+                        page = xfs_probe_unwritten_page(mapping,
+                                                        tindex, iomapp, pb,
+                                                        pg_offset, &bs, bbits);
+                        if (page) {
+                                nblocks += bs;
+                                atomic_add(bs, &pb->pb_io_remaining);
+                                xfs_convert_page(inode, page, iomapp, wbc, pb,
+                                                        startio, all_bh);
+                                if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits))
+                                        goto enough;
+                        }
+                }
+        }
+enough:
+        size = nblocks;         /* NB: using 64bit number here */
+        size <<= block_bits;    /* convert fsb's to byte range */
+        XFS_BUF_DATAIO(pb);
+        XFS_BUF_ASYNC(pb);
+        XFS_BUF_SET_SIZE(pb, size);
+        XFS_BUF_SET_COUNT(pb, size);
+        XFS_BUF_SET_OFFSET(pb, offset);
+        XFS_BUF_SET_FSPRIVATE(pb, LINVFS_GET_VP(inode));
+        XFS_BUF_SET_IODONE_FUNC(pb, linvfs_unwritten_convert);
+        if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {
+                pagebuf_iodone(pb, 1, 1);
+        }
+        return 0;
+}
+STATIC void
+xfs_submit_page(
+        struct page             *page,
+        struct writeback_control *wbc,
+        struct buffer_head      *bh_arr[],
+        int                     bh_count,
+        int                     probed_page,
+        int                     clear_dirty)
+{
+        struct buffer_head      *bh;
+        int                     i;
+        BUG_ON(PageWriteback(page));
+        set_page_writeback(page);
+        if (clear_dirty)
+                clear_page_dirty(page);
+        unlock_page(page);
+        if (bh_count) {
+                for (i = 0; i < bh_count; i++) {
+                        bh = bh_arr[i];
+                        mark_buffer_async_write(bh);
+                        if (buffer_unwritten(bh))
+                                set_buffer_unwritten_io(bh);
+                        set_buffer_uptodate(bh);
+                        clear_buffer_dirty(bh);
+                }
+                for (i = 0; i < bh_count; i++)
+                        submit_bh(WRITE, bh_arr[i]);
+                if (probed_page && clear_dirty)
+                        wbc->nr_to_write--;     /* Wrote an "extra" page */
+        } else {
+                end_page_writeback(page);
+                wbc->pages_skipped++;   /* We didn't write this page */
+        }
+}
+/*
+ * Allocate & map buffers for page given the extent map. Write it out.
+ * except for the original page of a writepage, this is called on
+ * delalloc/unwritten pages only, for the original page it is possible
+ * that the page has no mapping at all.
+ */
+STATIC void
+xfs_convert_page(
+        struct inode            *inode,
+        struct page             *page,
+        xfs_iomap_t             *iomapp,
+        struct writeback_control *wbc,
+        void                    *private,
+        int                     startio,
+        int                     all_bh)
+{
+        struct buffer_head      *bh_arr[MAX_BUF_PER_PAGE], *bh, *head;
+        xfs_iomap_t             *mp = iomapp, *tmp;
+        unsigned long           end, offset;
+        pgoff_t                 end_index;
+        int                     i = 0, index = 0;
+        int                     bbits = inode->i_blkbits;
+        end_index = i_size_read(inode) >> PAGE_CACHE_SHIFT;
+        if (page->index < end_index) {
+                end = PAGE_CACHE_SIZE;
+        } else {
+                end = i_size_read(inode) & (PAGE_CACHE_SIZE-1);
+        }
+        bh = head = page_buffers(page);
+        do {
+                offset = i << bbits;
+                if (offset >= end)
+                        break;
+                if (!(PageUptodate(page) || buffer_uptodate(bh)))
+                        continue;
+                if (buffer_mapped(bh) && all_bh &&
+                    !(buffer_unwritten(bh) || buffer_delay(bh))) {
+                        if (startio) {
+                                lock_buffer(bh);
+                                bh_arr[index++] = bh;
+                        }
+                        continue;
+                }
+                tmp = xfs_offset_to_map(page, mp, offset);
+                if (!tmp)
+                        continue;
+                ASSERT(!(tmp->iomap_flags & IOMAP_HOLE));
+                ASSERT(!(tmp->iomap_flags & IOMAP_DELAY));
+                /* If this is a new unwritten extent buffer (i.e. one
+                 * that we haven't passed in private data for, we must
+                 * now map this buffer too.
+                 */
+                if (buffer_unwritten(bh) && !bh->b_end_io) {
+                        ASSERT(tmp->iomap_flags & IOMAP_UNWRITTEN);
+                        xfs_map_unwritten(inode, page, head, bh, offset,
+                                        bbits, tmp, wbc, startio, all_bh);
+                } else if (! (buffer_unwritten(bh) && buffer_locked(bh))) {
+                        xfs_map_at_offset(page, bh, offset, bbits, tmp);
+                        if (buffer_unwritten(bh)) {
+                                set_buffer_unwritten_io(bh);
+                                bh->b_private = private;
+                                ASSERT(private);
+                        }
+                }
+                if (startio) {
+                        bh_arr[index++] = bh;
+                } else {
+                        set_buffer_dirty(bh);
+                        unlock_buffer(bh);
+                        mark_buffer_dirty(bh);
+                }
+        } while (i++, (bh = bh->b_this_page) != head);
+        if (startio) {
+                xfs_submit_page(page, wbc, bh_arr, index, 1, index == i);
+        } else {
+                unlock_page(page);
+        }
+}
+/*
+ * Convert & write out a cluster of pages in the same extent as defined
+ * by mp and following the start page.
+ */
+STATIC void
+xfs_cluster_write(
+        struct inode            *inode,
+        pgoff_t                 tindex,
+        xfs_iomap_t             *iomapp,
+        struct writeback_control *wbc,
+        int                     startio,
+        int                     all_bh,
+        pgoff_t                 tlast)
+{
+        struct page             *page;
+        for (; tindex <= tlast; tindex++) {
+                page = xfs_probe_delalloc_page(inode, tindex);
+                if (!page)
+                        break;
+                xfs_convert_page(inode, page, iomapp, wbc, NULL,
+                                startio, all_bh);
+        }
+}
+/*
+ * Calling this without startio set means we are being asked to make a dirty
+ * page ready for freeing it's buffers.  When called with startio set then
+ * we are coming from writepage.
+ *
+ * When called with startio set it is important that we write the WHOLE
+ * page if possible.
+ * The bh->b_state's cannot know if any of the blocks or which block for
+ * that matter are dirty due to mmap writes, and therefore bh uptodate is
+ * only vaild if the page itself isn't completely uptodate.  Some layers
+ * may clear the page dirty flag prior to calling write page, under the
+ * assumption the entire page will be written out; by not writing out the
+ * whole page the page can be reused before all valid dirty data is
+ * written out.  Note: in the case of a page that has been dirty'd by
+ * mapwrite and but partially setup by block_prepare_write the
+ * bh->b_states's will not agree and only ones setup by BPW/BCW will have
+ * valid state, thus the whole page must be written out thing.
+ */
+STATIC int
+xfs_page_state_convert(
+        struct inode    *inode,
+        struct page     *page,
+        struct writeback_control *wbc,
+        int             startio,
+        int             unmapped) /* also implies page uptodate */
+{
+        struct buffer_head      *bh_arr[MAX_BUF_PER_PAGE], *bh, *head;
+        xfs_iomap_t             *iomp, iomap;
+        loff_t                  offset;
+        unsigned long           p_offset = 0;
+        __uint64_t              end_offset;
+        pgoff_t                 end_index, last_index, tlast;
+        int                     len, err, i, cnt = 0, uptodate = 1;
+        int                     flags = startio ? 0 : BMAPI_TRYLOCK;
+        int                     page_dirty, delalloc = 0;
+        /* Is this page beyond the end of the file? */
+        offset = i_size_read(inode);
+        end_index = offset >> PAGE_CACHE_SHIFT;
+        last_index = (offset - 1) >> PAGE_CACHE_SHIFT;
+        if (page->index >= end_index) {
+                if ((page->index >= end_index + 1) ||
+                    !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) {
+                        err = -EIO;
+                        goto error;
+                }
+        }
+        offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
+        end_offset = min_t(unsigned long long,
+                        offset + PAGE_CACHE_SIZE, i_size_read(inode));
+        bh = head = page_buffers(page);
+        iomp = NULL;
+        /*
+         * page_dirty is initially a count of buffers on the page and
+         * is decrememted as we move each into a cleanable state.
+         */
+        len = bh->b_size;
+        page_dirty = PAGE_CACHE_SIZE / len;
+        do {
+                if (offset >= end_offset)
+                        break;
+                if (!buffer_uptodate(bh))
+                        uptodate = 0;
+                if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio)
+                        continue;
+                if (iomp) {
+                        iomp = xfs_offset_to_map(page, &iomap, p_offset);
+                }
+                /*
+                 * First case, map an unwritten extent and prepare for
+                 * extent state conversion transaction on completion.
+                 */
+                if (buffer_unwritten(bh)) {
+                        if (!startio)
+                                continue;
+                        if (!iomp) {
+                                err = xfs_map_blocks(inode, offset, len, &iomap,
+                                                BMAPI_READ|BMAPI_IGNSTATE);
+                                if (err) {
+                                        goto error;
+                                }
+                                iomp = xfs_offset_to_map(page, &iomap,
+                                                                p_offset);
+                        }
+                        if (iomp) {
+                                if (!bh->b_end_io) {
+                                        err = xfs_map_unwritten(inode, page,
+                                                        head, bh, p_offset,
+                                                        inode->i_blkbits, iomp,
+                                                        wbc, startio, unmapped);
+                                        if (err) {
+                                                goto error;
+                                        }
+                                } else {
+                                        set_bit(BH_Lock, &bh->b_state);
+                                }
+                                BUG_ON(!buffer_locked(bh));
+                                bh_arr[cnt++] = bh;
+                                page_dirty--;
+                        }
+                /*
+                 * Second case, allocate space for a delalloc buffer.
+                 * We can return EAGAIN here in the release page case.
+                 */
+                } else if (buffer_delay(bh)) {
+                        if (!iomp) {
+                                delalloc = 1;
+                                err = xfs_map_blocks(inode, offset, len, &iomap,
+                                                BMAPI_ALLOCATE | flags);
+                                if (err) {
+                                        goto error;
+                                }
+                                iomp = xfs_offset_to_map(page, &iomap,
+                                                                p_offset);
+                        }
+                        if (iomp) {
+                                xfs_map_at_offset(page, bh, p_offset,
+                                                inode->i_blkbits, iomp);
+                                if (startio) {
+                                        bh_arr[cnt++] = bh;
+                                } else {
+                                        set_buffer_dirty(bh);
+                                        unlock_buffer(bh);
+                                        mark_buffer_dirty(bh);
+                                }
+                                page_dirty--;
+                        }
+                } else if ((buffer_uptodate(bh) || PageUptodate(page)) &&
+                           (unmapped || startio)) {
+                        if (!buffer_mapped(bh)) {
+                                int     size;
+                                /*
+                                 * Getting here implies an unmapped buffer
+                                 * was found, and we are in a path where we
+                                 * need to write the whole page out.
+                                 */
+                                if (!iomp) {
+                                        size = xfs_probe_unmapped_cluster(
+                                                        inode, page, bh, head);
+                                        err = xfs_map_blocks(inode, offset,
+                                                        size, &iomap,
+                                                        BMAPI_WRITE|BMAPI_MMAP);
+                                        if (err) {
+                                                goto error;
+                                        }
+                                        iomp = xfs_offset_to_map(page, &iomap,
+                                                                     p_offset);
+                                }
+                                if (iomp) {
+                                        xfs_map_at_offset(page,
+                                                        bh, p_offset,
+                                                        inode->i_blkbits, iomp);
+                                        if (startio) {
+                                                bh_arr[cnt++] = bh;
+                                        } else {
+                                                set_buffer_dirty(bh);
+                                                unlock_buffer(bh);
+                                                mark_buffer_dirty(bh);
+                                        }
+                                        page_dirty--;
+                                }
+                        } else if (startio) {
+                                if (buffer_uptodate(bh) &&
+                                    !test_and_set_bit(BH_Lock, &bh->b_state)) {
+                                        bh_arr[cnt++] = bh;
+                                        page_dirty--;
+                                }
+                        }
+                }
+        } while (offset += len, p_offset += len,
+                ((bh = bh->b_this_page) != head));
+        if (uptodate && bh == head)
+                SetPageUptodate(page);
+        if (startio)
+                xfs_submit_page(page, wbc, bh_arr, cnt, 0, 1);
+        if (iomp) {
+                tlast = (iomp->iomap_offset + iomp->iomap_bsize - 1) >>
+                                        PAGE_CACHE_SHIFT;
+                if (delalloc && (tlast > last_index))
+                        tlast = last_index;
+                xfs_cluster_write(inode, page->index + 1, iomp, wbc,
+                                        startio, unmapped, tlast);
+        }
+        return page_dirty;
+error:
+        for (i = 0; i < cnt; i++) {
+                unlock_buffer(bh_arr[i]);
+        }
+        /*
+         * If it's delalloc and we have nowhere to put it,
+         * throw it away, unless the lower layers told
+         * us to try again.
+         */
+        if (err != -EAGAIN) {
+                if (!unmapped) {
+                        block_invalidatepage(page, 0);
+                }
+                ClearPageUptodate(page);
+        }
+        return err;
+}
+STATIC int
+__linvfs_get_block(
+        struct inode            *inode,
+        sector_t                iblock,
+        unsigned long           blocks,
+        struct buffer_head      *bh_result,
+        int                     create,
+        int                     direct,
+        bmapi_flags_t           flags)
+{
+        vnode_t                 *vp = LINVFS_GET_VP(inode);
+        xfs_iomap_t             iomap;
+        int                     retpbbm = 1;
+        int                     error;
+        ssize_t                 size;
+        loff_t                  offset = (loff_t)iblock << inode->i_blkbits;
+        if (blocks)
+                size = blocks << inode->i_blkbits;
+        else
+                size = 1 << inode->i_blkbits;
+        VOP_BMAP(vp, offset, size,
+                create ? flags : BMAPI_READ, &iomap, &retpbbm, error);
+        if (error)
+                return -error;
+        if (retpbbm == 0)
+                return 0;
+        if (iomap.iomap_bn != IOMAP_DADDR_NULL) {
+                xfs_daddr_t             bn;
+                loff_t                  delta;
+                /* For unwritten extents do not report a disk address on
+                 * the read case (treat as if we're reading into a hole).
+                 */
+                if (create || !(iomap.iomap_flags & IOMAP_UNWRITTEN)) {
+                        delta = offset - iomap.iomap_offset;
+                        delta >>= inode->i_blkbits;
+                        bn = iomap.iomap_bn >> (inode->i_blkbits - BBSHIFT);
+                        bn += delta;
+                        BUG_ON(!bn && !(iomap.iomap_flags & IOMAP_REALTIME));
+                        bh_result->b_blocknr = bn;
+                        set_buffer_mapped(bh_result);
+                }
+                if (create && (iomap.iomap_flags & IOMAP_UNWRITTEN)) {
+                        if (direct)
+                                bh_result->b_private = inode;
+                        set_buffer_unwritten(bh_result);
+                        set_buffer_delay(bh_result);
+                }
+        }
+        /* If this is a realtime file, data might be on a new device */
+        bh_result->b_bdev = iomap.iomap_target->pbr_bdev;
+        /* If we previously allocated a block out beyond eof and
+         * we are now coming back to use it then we will need to
+         * flag it as new even if it has a disk address.
+         */
+        if (create &&
+            ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) ||
+             (offset >= i_size_read(inode)) || (iomap.iomap_flags & IOMAP_NEW))) {
+                set_buffer_new(bh_result);
+        }
+        if (iomap.iomap_flags & IOMAP_DELAY) {
+                BUG_ON(direct);
+                if (create) {
+                        set_buffer_uptodate(bh_result);
+                        set_buffer_mapped(bh_result);
+                        set_buffer_delay(bh_result);
+                }
+        }
+        if (blocks) {
+                bh_result->b_size = (ssize_t)min(
+                        (loff_t)(iomap.iomap_bsize - iomap.iomap_delta),
+                        (loff_t)(blocks << inode->i_blkbits));
+        }
+        return 0;
+}
+int
+linvfs_get_block(
+        struct inode            *inode,
+        sector_t                iblock,
+        struct buffer_head      *bh_result,
+        int                     create)
+{
+        return __linvfs_get_block(inode, iblock, 0, bh_result,
+                                        create, 0, BMAPI_WRITE);
+}
+STATIC int
+linvfs_get_blocks_direct(
+        struct inode            *inode,
+        sector_t                iblock,
+        unsigned long           max_blocks,
+        struct buffer_head      *bh_result,
+        int                     create)
+{
+        return __linvfs_get_block(inode, iblock, max_blocks, bh_result,
+                                        create, 1, BMAPI_WRITE|BMAPI_DIRECT);
+}
+STATIC ssize_t
+linvfs_direct_IO(
+        int                     rw,
+        struct kiocb            *iocb,
+        const struct iovec      *iov,
+        loff_t                  offset,
+        unsigned long           nr_segs)
+{
+        struct file     *file = iocb->ki_filp;
+        struct inode    *inode = file->f_mapping->host;
+        vnode_t         *vp = LINVFS_GET_VP(inode);
+        xfs_iomap_t     iomap;
+        int             maps = 1;
+        int             error;
+        VOP_BMAP(vp, offset, 0, BMAPI_DEVICE, &iomap, &maps, error);
+        if (error)
+                return -error;
+        return blockdev_direct_IO_own_locking(rw, iocb, inode,
+                iomap.iomap_target->pbr_bdev,
+                iov, offset, nr_segs,
+                linvfs_get_blocks_direct,
+                linvfs_unwritten_convert_direct);
+}
+STATIC sector_t
+linvfs_bmap(
+        struct address_space    *mapping,
+        sector_t                block)
+{
+        struct inode            *inode = (struct inode *)mapping->host;
+        vnode_t                 *vp = LINVFS_GET_VP(inode);
+        int                     error;
+        vn_trace_entry(vp, "linvfs_bmap", (inst_t *)__return_address);
+        VOP_RWLOCK(vp, VRWLOCK_READ);
+        VOP_FLUSH_PAGES(vp, (xfs_off_t)0, -1, 0, FI_REMAPF, error);
+        VOP_RWUNLOCK(vp, VRWLOCK_READ);
+        return generic_block_bmap(mapping, block, linvfs_get_block);
+}
+STATIC int
+linvfs_readpage(
+        struct file             *unused,
+        struct page             *page)
+{
+        return mpage_readpage(page, linvfs_get_block);
+}
+STATIC int
+linvfs_readpages(
+        struct file             *unused,
+        struct address_space    *mapping,
+        struct list_head        *pages,
+        unsigned                nr_pages)
+{
+        return mpage_readpages(mapping, pages, nr_pages, linvfs_get_block);
+}
+STATIC void
+xfs_count_page_state(
+        struct page             *page,
+        int                     *delalloc,
+        int                     *unmapped,
+        int                     *unwritten)
+{
+        struct buffer_head      *bh, *head;
+        *delalloc = *unmapped = *unwritten = 0;
+        bh = head = page_buffers(page);
+        do {
+                if (buffer_uptodate(bh) && !buffer_mapped(bh))
+                        (*unmapped) = 1;
+                else if (buffer_unwritten(bh) && !buffer_delay(bh))
+                        clear_buffer_unwritten(bh);
+                else if (buffer_unwritten(bh))
+                        (*unwritten) = 1;
+                else if (buffer_delay(bh))
+                        (*delalloc) = 1;
+        } while ((bh = bh->b_this_page) != head);
+}
+/*
+ * writepage: Called from one of two places:
+ *
+ * 1. we are flushing a delalloc buffer head.
+ *
+ * 2. we are writing out a dirty page. Typically the page dirty
+ *    state is cleared before we get here. In this case is it
+ *    conceivable we have no buffer heads.
+ *
+ * For delalloc space on the page we need to allocate space and
+ * flush it. For unmapped buffer heads on the page we should
+ * allocate space if the page is uptodate. For any other dirty
+ * buffer heads on the page we should flush them.
+ *
+ * If we detect that a transaction would be required to flush
+ * the page, we have to check the process flags first, if we
+ * are already in a transaction or disk I/O during allocations
+ * is off, we need to fail the writepage and redirty the page.
+ */
+STATIC int
+linvfs_writepage(
+        struct page             *page,
+        struct writeback_control *wbc)
+{
+        int                     error;
+        int                     need_trans;
+        int                     delalloc, unmapped, unwritten;
+        struct inode            *inode = page->mapping->host;
+        xfs_page_trace(XFS_WRITEPAGE_ENTER, inode, page, 0);
+        /*
+         * We need a transaction if:
+         *  1. There are delalloc buffers on the page
+         *  2. The page is uptodate and we have unmapped buffers
+         *  3. The page is uptodate and we have no buffers
+         *  4. There are unwritten buffers on the page
+         */
+        if (!page_has_buffers(page)) {
+                unmapped = 1;
+                need_trans = 1;
+        } else {
+                xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
+                if (!PageUptodate(page))
+                        unmapped = 0;
+                need_trans = delalloc + unmapped + unwritten;
+        }
+        /*
+         * If we need a transaction and the process flags say
+         * we are already in a transaction, or no IO is allowed
+         * then mark the page dirty again and leave the page
+         * as is.
+         */
+        if (PFLAGS_TEST_FSTRANS() && need_trans)
+                goto out_fail;
+        /*
+         * Delay hooking up buffer heads until we have
+         * made our go/no-go decision.
+         */
+        if (!page_has_buffers(page))
+                create_empty_buffers(page, 1 << inode->i_blkbits, 0);
+        /*
+         * Convert delayed allocate, unwritten or unmapped space
+         * to real space and flush out to disk.
+         */
+        error = xfs_page_state_convert(inode, page, wbc, 1, unmapped);
+        if (error == -EAGAIN)
+                goto out_fail;
+        if (unlikely(error < 0))
+                goto out_unlock;
+        return 0;
+out_fail:
+        redirty_page_for_writepage(wbc, page);
+        unlock_page(page);
+        return 0;
+out_unlock:
+        unlock_page(page);
+        return error;
+}
+/*
+ * Called to move a page into cleanable state - and from there
+ * to be released. Possibly the page is already clean. We always
+ * have buffer heads in this call.
+ *
+ * Returns 0 if the page is ok to release, 1 otherwise.
+ *
+ * Possible scenarios are:
+ *
+ * 1. We are being called to release a page which has been written
+ *    to via regular I/O. buffer heads will be dirty and possibly
+ *    delalloc. If no delalloc buffer heads in this case then we
+ *    can just return zero.
+ *
+ * 2. We are called to release a page which has been written via
+ *    mmap, all we need to do is ensure there is no delalloc
+ *    state in the buffer heads, if not we can let the caller
+ *    free them and we should come back later via writepage.
+ */
+STATIC int
+linvfs_release_page(
+        struct page             *page,
+        int                     gfp_mask)
+{
+        struct inode            *inode = page->mapping->host;
+        int                     dirty, delalloc, unmapped, unwritten;
+        struct writeback_control wbc = {
+                .sync_mode = WB_SYNC_ALL,
+                .nr_to_write = 1,
+        };
+        xfs_page_trace(XFS_RELEASEPAGE_ENTER, inode, page, gfp_mask);
+        xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
+        if (!delalloc && !unwritten)
+                goto free_buffers;
+        if (!(gfp_mask & __GFP_FS))
+                return 0;
+        /* If we are already inside a transaction or the thread cannot
+         * do I/O, we cannot release this page.
+         */
+        if (PFLAGS_TEST_FSTRANS())
+                return 0;
+        /*
+         * Convert delalloc space to real space, do not flush the
+         * data out to disk, that will be done by the caller.
+         * Never need to allocate space here - we will always
+         * come back to writepage in that case.
+         */
+        dirty = xfs_page_state_convert(inode, page, &wbc, 0, 0);
+        if (dirty == 0 && !unwritten)
+                goto free_buffers;
+        return 0;
+free_buffers:
+        return try_to_free_buffers(page);
+}
+STATIC int
+linvfs_prepare_write(
+        struct file             *file,
+        struct page             *page,
+        unsigned int            from,
+        unsigned int            to)
+{
+        return block_prepare_write(page, from, to, linvfs_get_block);
+}
+struct address_space_operations linvfs_aops = {
+        .readpage               = linvfs_readpage,
+        .readpages              = linvfs_readpages,
+        .writepage              = linvfs_writepage,
+        .sync_page              = block_sync_page,
+        .releasepage            = linvfs_release_page,
+        .prepare_write          = linvfs_prepare_write,
+        .commit_write           = generic_commit_write,
+        .bmap                   = linvfs_bmap,
+        .direct_IO              = linvfs_direct_IO,
+};
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
new file mode 100644
index 000000000000..23e0eb67fc25
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -0,0 +1,1980 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+/*
+ *      The xfs_buf.c code provides an abstract buffer cache model on top
+ *      of the Linux page cache.  Cached metadata blocks for a file system
+ *      are hashed to the inode for the block device.  xfs_buf.c assembles
+ *      buffers (xfs_buf_t) on demand to aggregate such cached pages for I/O.
+ *
+ *      Written by Steve Lord, Jim Mostek, Russell Cattelan
+ *                  and Rajagopal Ananthanarayanan ("ananth") at SGI.
+ *
+ */
+#include <linux/stddef.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <linux/init.h>
+#include <linux/vmalloc.h>
+#include <linux/bio.h>
+#include <linux/sysctl.h>
+#include <linux/proc_fs.h>
+#include <linux/workqueue.h>
+#include <linux/percpu.h>
+#include <linux/blkdev.h>
+#include <linux/hash.h>
+#include "xfs_linux.h"
+/*
+ * File wide globals
+ */
+STATIC kmem_cache_t *pagebuf_cache;
+STATIC kmem_shaker_t pagebuf_shake;
+STATIC int pagebuf_daemon_wakeup(int, unsigned int);
+STATIC void pagebuf_delwri_queue(xfs_buf_t *, int);
+STATIC struct workqueue_struct *pagebuf_logio_workqueue;
+STATIC struct workqueue_struct *pagebuf_dataio_workqueue;
+/*
+ * Pagebuf debugging
+ */
+#ifdef PAGEBUF_TRACE
+void
+pagebuf_trace(
+        xfs_buf_t       *pb,
+        char            *id,
+        void            *data,
+        void            *ra)
+{
+        ktrace_enter(pagebuf_trace_buf,
+                pb, id,
+                (void *)(unsigned long)pb->pb_flags,
+                (void *)(unsigned long)pb->pb_hold.counter,
+                (void *)(unsigned long)pb->pb_sema.count.counter,
+                (void *)current,
+                data, ra,
+                (void *)(unsigned long)((pb->pb_file_offset>>32) & 0xffffffff),
+                (void *)(unsigned long)(pb->pb_file_offset & 0xffffffff),
+                (void *)(unsigned long)pb->pb_buffer_length,
+                NULL, NULL, NULL, NULL, NULL);
+}
+ktrace_t *pagebuf_trace_buf;
+#define PAGEBUF_TRACE_SIZE      4096
+#define PB_TRACE(pb, id, data)  \
+        pagebuf_trace(pb, id, (void *)data, (void *)__builtin_return_address(0))
+#else
+#define PB_TRACE(pb, id, data)  do { } while (0)
+#endif
+#ifdef PAGEBUF_LOCK_TRACKING
+# define PB_SET_OWNER(pb)       ((pb)->pb_last_holder = current->pid)
+# define PB_CLEAR_OWNER(pb)     ((pb)->pb_last_holder = -1)
+# define PB_GET_OWNER(pb)       ((pb)->pb_last_holder)
+#else
+# define PB_SET_OWNER(pb)       do { } while (0)
+# define PB_CLEAR_OWNER(pb)     do { } while (0)
+# define PB_GET_OWNER(pb)       do { } while (0)
+#endif
+/*
+ * Pagebuf allocation / freeing.
+ */
+#define pb_to_gfp(flags) \
+        ((((flags) & PBF_READ_AHEAD) ? __GFP_NORETRY : \
+          ((flags) & PBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN)
+#define pb_to_km(flags) \
+         (((flags) & PBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP)
+#define pagebuf_allocate(flags) \
+        kmem_zone_alloc(pagebuf_cache, pb_to_km(flags))
+#define pagebuf_deallocate(pb) \
+        kmem_zone_free(pagebuf_cache, (pb));
+/*
+ * Page Region interfaces.
+ *
+ * For pages in filesystems where the blocksize is smaller than the
+ * pagesize, we use the page->private field (long) to hold a bitmap
+ * of uptodate regions within the page.
+ *
+ * Each such region is "bytes per page / bits per long" bytes long.
+ *
+ * NBPPR == number-of-bytes-per-page-region
+ * BTOPR == bytes-to-page-region (rounded up)
+ * BTOPRT == bytes-to-page-region-truncated (rounded down)
+ */
+#if (BITS_PER_LONG == 32)
+#define PRSHIFT         (PAGE_CACHE_SHIFT - 5)  /* (32 == 1<<5) */
+#elif (BITS_PER_LONG == 64)
+#define PRSHIFT         (PAGE_CACHE_SHIFT - 6)  /* (64 == 1<<6) */
+#else
+#error BITS_PER_LONG must be 32 or 64
+#endif
+#define NBPPR           (PAGE_CACHE_SIZE/BITS_PER_LONG)
+#define BTOPR(b)        (((unsigned int)(b) + (NBPPR - 1)) >> PRSHIFT)
+#define BTOPRT(b)       (((unsigned int)(b) >> PRSHIFT))
+STATIC unsigned long
+page_region_mask(
+        size_t          offset,
+        size_t          length)
+{
+        unsigned long   mask;
+        int             first, final;
+        first = BTOPR(offset);
+        final = BTOPRT(offset + length - 1);
+        first = min(first, final);
+        mask = ~0UL;
+        mask <<= BITS_PER_LONG - (final - first);
+        mask >>= BITS_PER_LONG - (final);
+        ASSERT(offset + length <= PAGE_CACHE_SIZE);
+        ASSERT((final - first) < BITS_PER_LONG && (final - first) >= 0);
+        return mask;
+}
+STATIC inline void
+set_page_region(
+        struct page     *page,
+        size_t          offset,
+        size_t          length)
+{
+        page->private |= page_region_mask(offset, length);
+        if (page->private == ~0UL)
+                SetPageUptodate(page);
+}
+STATIC inline int
+test_page_region(
+        struct page     *page,
+        size_t          offset,
+        size_t          length)
+{
+        unsigned long   mask = page_region_mask(offset, length);
+        return (mask && (page->private & mask) == mask);
+}
+/*
+ * Mapping of multi-page buffers into contiguous virtual space
+ */
+typedef struct a_list {
+        void            *vm_addr;
+        struct a_list   *next;
+} a_list_t;
+STATIC a_list_t         *as_free_head;
+STATIC int              as_list_len;
+STATIC DEFINE_SPINLOCK(as_lock);
+/*
+ * Try to batch vunmaps because they are costly.
+ */
+STATIC void
+free_address(
+        void            *addr)
+{
+        a_list_t        *aentry;
+        aentry = kmalloc(sizeof(a_list_t), GFP_ATOMIC & ~__GFP_HIGH);
+        if (likely(aentry)) {
+                spin_lock(&as_lock);
+                aentry->next = as_free_head;
+                aentry->vm_addr = addr;
+                as_free_head = aentry;
+                as_list_len++;
+                spin_unlock(&as_lock);
+        } else {
+                vunmap(addr);
+        }
+}
+STATIC void
+purge_addresses(void)
+{
+        a_list_t        *aentry, *old;
+        if (as_free_head == NULL)
+                return;
+        spin_lock(&as_lock);
+        aentry = as_free_head;
+        as_free_head = NULL;
+        as_list_len = 0;
+        spin_unlock(&as_lock);
+        while ((old = aentry) != NULL) {
+                vunmap(aentry->vm_addr);
+                aentry = aentry->next;
+                kfree(old);
+        }
+}
+/*
+ *      Internal pagebuf object manipulation
+ */
+STATIC void
+_pagebuf_initialize(
+        xfs_buf_t               *pb,
+        xfs_buftarg_t           *target,
+        loff_t                  range_base,
+        size_t                  range_length,
+        page_buf_flags_t        flags)
+{
+        /*
+         * We don't want certain flags to appear in pb->pb_flags.
+         */
+        flags &= ~(PBF_LOCK|PBF_MAPPED|PBF_DONT_BLOCK|PBF_READ_AHEAD);
+        memset(pb, 0, sizeof(xfs_buf_t));
+        atomic_set(&pb->pb_hold, 1);
+        init_MUTEX_LOCKED(&pb->pb_iodonesema);
+        INIT_LIST_HEAD(&pb->pb_list);
+        INIT_LIST_HEAD(&pb->pb_hash_list);
+        init_MUTEX_LOCKED(&pb->pb_sema); /* held, no waiters */
+        PB_SET_OWNER(pb);
+        pb->pb_target = target;
+        pb->pb_file_offset = range_base;
+        /*
+         * Set buffer_length and count_desired to the same value initially.
+         * I/O routines should use count_desired, which will be the same in
+         * most cases but may be reset (e.g. XFS recovery).
+         */
+        pb->pb_buffer_length = pb->pb_count_desired = range_length;
+        pb->pb_flags = flags | PBF_NONE;
+        pb->pb_bn = XFS_BUF_DADDR_NULL;
+        atomic_set(&pb->pb_pin_count, 0);
+        init_waitqueue_head(&pb->pb_waiters);
+        XFS_STATS_INC(pb_create);
+        PB_TRACE(pb, "initialize", target);
+}
+/*
+ * Allocate a page array capable of holding a specified number
+ * of pages, and point the page buf at it.
+ */
+STATIC int
+_pagebuf_get_pages(
+        xfs_buf_t               *pb,
+        int                     page_count,
+        page_buf_flags_t        flags)
+{
+        /* Make sure that we have a page list */
+        if (pb->pb_pages == NULL) {
+                pb->pb_offset = page_buf_poff(pb->pb_file_offset);
+                pb->pb_page_count = page_count;
+                if (page_count <= PB_PAGES) {
+                        pb->pb_pages = pb->pb_page_array;
+                } else {
+                        pb->pb_pages = kmem_alloc(sizeof(struct page *) *
+                                        page_count, pb_to_km(flags));
+                        if (pb->pb_pages == NULL)
+                                return -ENOMEM;
+                }
+                memset(pb->pb_pages, 0, sizeof(struct page *) * page_count);
+        }
+        return 0;
+}
+/*
+ *      Frees pb_pages if it was malloced.
+ */
+STATIC void
+_pagebuf_free_pages(
+        xfs_buf_t       *bp)
+{
+        if (bp->pb_pages != bp->pb_page_array) {
+                kmem_free(bp->pb_pages,
+                          bp->pb_page_count * sizeof(struct page *));
+        }
+}
+/*
+ *      Releases the specified buffer.
+ *
+ *      The modification state of any associated pages is left unchanged.
+ *      The buffer most not be on any hash - use pagebuf_rele instead for
+ *      hashed and refcounted buffers
+ */
+void
+pagebuf_free(
+        xfs_buf_t               *bp)
+{
+        PB_TRACE(bp, "free", 0);
+        ASSERT(list_empty(&bp->pb_hash_list));
+        if (bp->pb_flags & _PBF_PAGE_CACHE) {
+                uint            i;
+                if ((bp->pb_flags & PBF_MAPPED) && (bp->pb_page_count > 1))
+                        free_address(bp->pb_addr - bp->pb_offset);
+                for (i = 0; i < bp->pb_page_count; i++)
+                        page_cache_release(bp->pb_pages[i]);
+                _pagebuf_free_pages(bp);
+        } else if (bp->pb_flags & _PBF_KMEM_ALLOC) {
+                 /*
+                  * XXX(hch): bp->pb_count_desired might be incorrect (see
+                  * pagebuf_associate_memory for details), but fortunately
+                  * the Linux version of kmem_free ignores the len argument..
+                  */
+                kmem_free(bp->pb_addr, bp->pb_count_desired);
+                _pagebuf_free_pages(bp);
+        }
+        pagebuf_deallocate(bp);
+}
+/*
+ *      Finds all pages for buffer in question and builds it's page list.
+ */
+STATIC int
+_pagebuf_lookup_pages(
+        xfs_buf_t               *bp,
+        uint                    flags)
+{
+        struct address_space    *mapping = bp->pb_target->pbr_mapping;
+        size_t                  blocksize = bp->pb_target->pbr_bsize;
+        size_t                  size = bp->pb_count_desired;
+        size_t                  nbytes, offset;
+        int                     gfp_mask = pb_to_gfp(flags);
+        unsigned short          page_count, i;
+        pgoff_t                 first;
+        loff_t                  end;
+        int                     error;
+        end = bp->pb_file_offset + bp->pb_buffer_length;
+        page_count = page_buf_btoc(end) - page_buf_btoct(bp->pb_file_offset);
+        error = _pagebuf_get_pages(bp, page_count, flags);
+        if (unlikely(error))
+                return error;
+        bp->pb_flags |= _PBF_PAGE_CACHE;
+        offset = bp->pb_offset;
+        first = bp->pb_file_offset >> PAGE_CACHE_SHIFT;
+        for (i = 0; i < bp->pb_page_count; i++) {
+                struct page     *page;
+                uint            retries = 0;
+              retry:
+                page = find_or_create_page(mapping, first + i, gfp_mask);
+                if (unlikely(page == NULL)) {
+                        if (flags & PBF_READ_AHEAD) {
+                                bp->pb_page_count = i;
+                                for (i = 0; i < bp->pb_page_count; i++)
+                                        unlock_page(bp->pb_pages[i]);
+                                return -ENOMEM;
+                        }
+                        /*
+                         * This could deadlock.
+                         *
+                         * But until all the XFS lowlevel code is revamped to
+                         * handle buffer allocation failures we can't do much.
+                         */
+                        if (!(++retries % 100))
+                                printk(KERN_ERR
+                                        "XFS: possible memory allocation "
+                                        "deadlock in %s (mode:0x%x)\n",
+                                        __FUNCTION__, gfp_mask);
+                        XFS_STATS_INC(pb_page_retries);
+                        pagebuf_daemon_wakeup(0, gfp_mask);
+                        blk_congestion_wait(WRITE, HZ/50);
+                        goto retry;
+                }
+                XFS_STATS_INC(pb_page_found);
+                nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset);
+                size -= nbytes;
+                if (!PageUptodate(page)) {
+                        page_count--;
+                        if (blocksize >= PAGE_CACHE_SIZE) {
+                                if (flags & PBF_READ)
+                                        bp->pb_locked = 1;
+                        } else if (!PagePrivate(page)) {
+                                if (test_page_region(page, offset, nbytes))
+                                        page_count++;
+                        }
+                }
+                bp->pb_pages[i] = page;
+                offset = 0;
+        }
+        if (!bp->pb_locked) {
+                for (i = 0; i < bp->pb_page_count; i++)
+                        unlock_page(bp->pb_pages[i]);
+        }
+        if (page_count) {
+                /* if we have any uptodate pages, mark that in the buffer */
+                bp->pb_flags &= ~PBF_NONE;
+                /* if some pages aren't uptodate, mark that in the buffer */
+                if (page_count != bp->pb_page_count)
+                        bp->pb_flags |= PBF_PARTIAL;
+        }
+        PB_TRACE(bp, "lookup_pages", (long)page_count);
+        return error;
+}
+/*
+ *      Map buffer into kernel address-space if nessecary.
+ */
+STATIC int
+_pagebuf_map_pages(
+        xfs_buf_t               *bp,
+        uint                    flags)
+{
+        /* A single page buffer is always mappable */
+        if (bp->pb_page_count == 1) {
+                bp->pb_addr = page_address(bp->pb_pages[0]) + bp->pb_offset;
+                bp->pb_flags |= PBF_MAPPED;
+        } else if (flags & PBF_MAPPED) {
+                if (as_list_len > 64)
+                        purge_addresses();
+                bp->pb_addr = vmap(bp->pb_pages, bp->pb_page_count,
+                                VM_MAP, PAGE_KERNEL);
+                if (unlikely(bp->pb_addr == NULL))
+                        return -ENOMEM;
+                bp->pb_addr += bp->pb_offset;
+                bp->pb_flags |= PBF_MAPPED;
+        }
+        return 0;
+}
+/*
+ *      Finding and Reading Buffers
+ */
+/*
+ *      _pagebuf_find
+ *
+ *      Looks up, and creates if absent, a lockable buffer for
+ *      a given range of an inode.  The buffer is returned
+ *      locked.  If other overlapping buffers exist, they are
+ *      released before the new buffer is created and locked,
+ *      which may imply that this call will block until those buffers
+ *      are unlocked.  No I/O is implied by this call.
+ */
+xfs_buf_t *
+_pagebuf_find(
+        xfs_buftarg_t           *btp,   /* block device target          */
+        loff_t                  ioff,   /* starting offset of range     */
+        size_t                  isize,  /* length of range              */
+        page_buf_flags_t        flags,  /* PBF_TRYLOCK                  */
+        xfs_buf_t               *new_pb)/* newly allocated buffer       */
+{
+        loff_t                  range_base;
+        size_t                  range_length;
+        xfs_bufhash_t           *hash;
+        xfs_buf_t               *pb, *n;
+        range_base = (ioff << BBSHIFT);
+        range_length = (isize << BBSHIFT);
+        /* Check for IOs smaller than the sector size / not sector aligned */
+        ASSERT(!(range_length < (1 << btp->pbr_sshift)));
+        ASSERT(!(range_base & (loff_t)btp->pbr_smask));
+        hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)];
+        spin_lock(&hash->bh_lock);
+        list_for_each_entry_safe(pb, n, &hash->bh_list, pb_hash_list) {
+                ASSERT(btp == pb->pb_target);
+                if (pb->pb_file_offset == range_base &&
+                    pb->pb_buffer_length == range_length) {
+                        /*
+                         * If we look at something bring it to the
+                         * front of the list for next time.
+                         */
+                        atomic_inc(&pb->pb_hold);
+                        list_move(&pb->pb_hash_list, &hash->bh_list);
+                        goto found;
+                }
+        }
+        /* No match found */
+        if (new_pb) {
+                _pagebuf_initialize(new_pb, btp, range_base,
+                                range_length, flags);
+                new_pb->pb_hash = hash;
+                list_add(&new_pb->pb_hash_list, &hash->bh_list);
+        } else {
+                XFS_STATS_INC(pb_miss_locked);
+        }
+        spin_unlock(&hash->bh_lock);
+        return new_pb;
+found:
+        spin_unlock(&hash->bh_lock);
+        /* Attempt to get the semaphore without sleeping,
+         * if this does not work then we need to drop the
+         * spinlock and do a hard attempt on the semaphore.
+         */
+        if (down_trylock(&pb->pb_sema)) {
+                if (!(flags & PBF_TRYLOCK)) {
+                        /* wait for buffer ownership */
+                        PB_TRACE(pb, "get_lock", 0);
+                        pagebuf_lock(pb);
+                        XFS_STATS_INC(pb_get_locked_waited);
+                } else {
+                        /* We asked for a trylock and failed, no need
+                         * to look at file offset and length here, we
+                         * know that this pagebuf at least overlaps our
+                         * pagebuf and is locked, therefore our buffer
+                         * either does not exist, or is this buffer
+                         */
+                        pagebuf_rele(pb);
+                        XFS_STATS_INC(pb_busy_locked);
+                        return (NULL);
+                }
+        } else {
+                /* trylock worked */
+                PB_SET_OWNER(pb);
+        }
+        if (pb->pb_flags & PBF_STALE)
+                pb->pb_flags &= PBF_MAPPED;
+        PB_TRACE(pb, "got_lock", 0);
+        XFS_STATS_INC(pb_get_locked);
+        return (pb);
+}
+/*
+ *      xfs_buf_get_flags assembles a buffer covering the specified range.
+ *
+ *      Storage in memory for all portions of the buffer will be allocated,
+ *      although backing storage may not be.
+ */
+xfs_buf_t *
+xfs_buf_get_flags(                      /* allocate a buffer            */
+        xfs_buftarg_t           *target,/* target for buffer            */
+        loff_t                  ioff,   /* starting offset of range     */
+        size_t                  isize,  /* length of range              */
+        page_buf_flags_t        flags)  /* PBF_TRYLOCK                  */
+{
+        xfs_buf_t               *pb, *new_pb;
+        int                     error = 0, i;
+        new_pb = pagebuf_allocate(flags);
+        if (unlikely(!new_pb))
+                return NULL;
+        pb = _pagebuf_find(target, ioff, isize, flags, new_pb);
+        if (pb == new_pb) {
+                error = _pagebuf_lookup_pages(pb, flags);
+                if (error)
+                        goto no_buffer;
+        } else {
+                pagebuf_deallocate(new_pb);
+                if (unlikely(pb == NULL))
+                        return NULL;
+        }
+        for (i = 0; i < pb->pb_page_count; i++)
+                mark_page_accessed(pb->pb_pages[i]);
+        if (!(pb->pb_flags & PBF_MAPPED)) {
+                error = _pagebuf_map_pages(pb, flags);
+                if (unlikely(error)) {
+                        printk(KERN_WARNING "%s: failed to map pages\n",
+                                        __FUNCTION__);
+                        goto no_buffer;
+                }
+        }
+        XFS_STATS_INC(pb_get);
+        /*
+         * Always fill in the block number now, the mapped cases can do
+         * their own overlay of this later.
+         */
+        pb->pb_bn = ioff;
+        pb->pb_count_desired = pb->pb_buffer_length;
+        PB_TRACE(pb, "get", (unsigned long)flags);
+        return pb;
+ no_buffer:
+        if (flags & (PBF_LOCK | PBF_TRYLOCK))
+                pagebuf_unlock(pb);
+        pagebuf_rele(pb);
+        return NULL;
+}
+xfs_buf_t *
+xfs_buf_read_flags(
+        xfs_buftarg_t           *target,
+        loff_t                  ioff,
+        size_t                  isize,
+        page_buf_flags_t        flags)
+{
+        xfs_buf_t               *pb;
+        flags |= PBF_READ;
+        pb = xfs_buf_get_flags(target, ioff, isize, flags);
+        if (pb) {
+                if (PBF_NOT_DONE(pb)) {
+                        PB_TRACE(pb, "read", (unsigned long)flags);
+                        XFS_STATS_INC(pb_get_read);
+                        pagebuf_iostart(pb, flags);
+                } else if (flags & PBF_ASYNC) {
+                        PB_TRACE(pb, "read_async", (unsigned long)flags);
+                        /*
+                         * Read ahead call which is already satisfied,
+                         * drop the buffer
+                         */
+                        goto no_buffer;
+                } else {
+                        PB_TRACE(pb, "read_done", (unsigned long)flags);
+                        /* We do not want read in the flags */
+                        pb->pb_flags &= ~PBF_READ;
+                }
+        }
+        return pb;
+ no_buffer:
+        if (flags & (PBF_LOCK | PBF_TRYLOCK))
+                pagebuf_unlock(pb);
+        pagebuf_rele(pb);
+        return NULL;
+}
+/*
+ * Create a skeletal pagebuf (no pages associated with it).
+ */
+xfs_buf_t *
+pagebuf_lookup(
+        xfs_buftarg_t           *target,
+        loff_t                  ioff,
+        size_t                  isize,
+        page_buf_flags_t        flags)
+{
+        xfs_buf_t               *pb;
+        pb = pagebuf_allocate(flags);
+        if (pb) {
+                _pagebuf_initialize(pb, target, ioff, isize, flags);
+        }
+        return pb;
+}
+/*
+ * If we are not low on memory then do the readahead in a deadlock
+ * safe manner.
+ */
+void
+pagebuf_readahead(
+        xfs_buftarg_t           *target,
+        loff_t                  ioff,
+        size_t                  isize,
+        page_buf_flags_t        flags)
+{
+        struct backing_dev_info *bdi;
+        bdi = target->pbr_mapping->backing_dev_info;
+        if (bdi_read_congested(bdi))
+                return;
+        flags |= (PBF_TRYLOCK|PBF_ASYNC|PBF_READ_AHEAD);
+        xfs_buf_read_flags(target, ioff, isize, flags);
+}
+xfs_buf_t *
+pagebuf_get_empty(
+        size_t                  len,
+        xfs_buftarg_t           *target)
+{
+        xfs_buf_t               *pb;
+        pb = pagebuf_allocate(0);
+        if (pb)
+                _pagebuf_initialize(pb, target, 0, len, 0);
+        return pb;
+}
+static inline struct page *
+mem_to_page(
+        void                    *addr)
+{
+        if (((unsigned long)addr < VMALLOC_START) ||
+            ((unsigned long)addr >= VMALLOC_END)) {
+                return virt_to_page(addr);
+        } else {
+                return vmalloc_to_page(addr);
+        }
+}
+int
+pagebuf_associate_memory(
+        xfs_buf_t               *pb,
+        void                    *mem,
+        size_t                  len)
+{
+        int                     rval;
+        int                     i = 0;
+        size_t                  ptr;
+        size_t                  end, end_cur;
+        off_t                   offset;
+        int                     page_count;
+        page_count = PAGE_CACHE_ALIGN(len) >> PAGE_CACHE_SHIFT;
+        offset = (off_t) mem - ((off_t)mem & PAGE_CACHE_MASK);
+        if (offset && (len > PAGE_CACHE_SIZE))
+                page_count++;
+        /* Free any previous set of page pointers */
+        if (pb->pb_pages)
+                _pagebuf_free_pages(pb);
+        pb->pb_pages = NULL;
+        pb->pb_addr = mem;
+        rval = _pagebuf_get_pages(pb, page_count, 0);
+        if (rval)
+                return rval;
+        pb->pb_offset = offset;
+        ptr = (size_t) mem & PAGE_CACHE_MASK;
+        end = PAGE_CACHE_ALIGN((size_t) mem + len);
+        end_cur = end;
+        /* set up first page */
+        pb->pb_pages[0] = mem_to_page(mem);
+        ptr += PAGE_CACHE_SIZE;
+        pb->pb_page_count = ++i;
+        while (ptr < end) {
+                pb->pb_pages[i] = mem_to_page((void *)ptr);
+                pb->pb_page_count = ++i;
+                ptr += PAGE_CACHE_SIZE;
+        }
+        pb->pb_locked = 0;
+        pb->pb_count_desired = pb->pb_buffer_length = len;
+        pb->pb_flags |= PBF_MAPPED;
+        return 0;
+}
+xfs_buf_t *
+pagebuf_get_no_daddr(
+        size_t                  len,
+        xfs_buftarg_t           *target)
+{
+        size_t                  malloc_len = len;
+        xfs_buf_t               *bp;
+        void                    *data;
+        int                     error;
+        bp = pagebuf_allocate(0);
+        if (unlikely(bp == NULL))
+                goto fail;
+        _pagebuf_initialize(bp, target, 0, len, PBF_FORCEIO);
+ try_again:
+        data = kmem_alloc(malloc_len, KM_SLEEP | KM_MAYFAIL);
+        if (unlikely(data == NULL))
+                goto fail_free_buf;
+        /* check whether alignment matches.. */
+        if ((__psunsigned_t)data !=
+            ((__psunsigned_t)data & ~target->pbr_smask)) {
+                /* .. else double the size and try again */
+                kmem_free(data, malloc_len);
+                malloc_len <<= 1;
+                goto try_again;
+        }
+        error = pagebuf_associate_memory(bp, data, len);
+        if (error)
+                goto fail_free_mem;
+        bp->pb_flags |= _PBF_KMEM_ALLOC;
+        pagebuf_unlock(bp);
+        PB_TRACE(bp, "no_daddr", data);
+        return bp;
+ fail_free_mem:
+        kmem_free(data, malloc_len);
+ fail_free_buf:
+        pagebuf_free(bp);
+ fail:
+        return NULL;
+}
+/*
+ *      pagebuf_hold
+ *
+ *      Increment reference count on buffer, to hold the buffer concurrently
+ *      with another thread which may release (free) the buffer asynchronously.
+ *
+ *      Must hold the buffer already to call this function.
+ */
+void
+pagebuf_hold(
+        xfs_buf_t               *pb)
+{
+        atomic_inc(&pb->pb_hold);
+        PB_TRACE(pb, "hold", 0);
+}
+/*
+ *      pagebuf_rele
+ *
+ *      pagebuf_rele releases a hold on the specified buffer.  If the
+ *      the hold count is 1, pagebuf_rele calls pagebuf_free.
+ */
+void
+pagebuf_rele(
+        xfs_buf_t               *pb)
+{
+        xfs_bufhash_t           *hash = pb->pb_hash;
+        PB_TRACE(pb, "rele", pb->pb_relse);
+        /*
+         * pagebuf_lookup buffers are not hashed, not delayed write,
+         * and don't have their own release routines.  Special case.
+         */
+        if (unlikely(!hash)) {
+                ASSERT(!pb->pb_relse);
+                if (atomic_dec_and_test(&pb->pb_hold))
+                        xfs_buf_free(pb);
+                return;
+        }
+        if (atomic_dec_and_lock(&pb->pb_hold, &hash->bh_lock)) {
+                int             do_free = 1;
+                if (pb->pb_relse) {
+                        atomic_inc(&pb->pb_hold);
+                        spin_unlock(&hash->bh_lock);
+                        (*(pb->pb_relse)) (pb);
+                        spin_lock(&hash->bh_lock);
+                        do_free = 0;
+                }
+                if (pb->pb_flags & PBF_DELWRI) {
+                        pb->pb_flags |= PBF_ASYNC;
+                        atomic_inc(&pb->pb_hold);
+                        pagebuf_delwri_queue(pb, 0);
+                        do_free = 0;
+                } else if (pb->pb_flags & PBF_FS_MANAGED) {
+                        do_free = 0;
+                }
+                if (do_free) {
+                        list_del_init(&pb->pb_hash_list);
+                        spin_unlock(&hash->bh_lock);
+                        pagebuf_free(pb);
+                } else {
+                        spin_unlock(&hash->bh_lock);
+                }
+        }
+}
+/*
+ *      Mutual exclusion on buffers.  Locking model:
+ *
+ *      Buffers associated with inodes for which buffer locking
+ *      is not enabled are not protected by semaphores, and are
+ *      assumed to be exclusively owned by the caller.  There is a
+ *      spinlock in the buffer, used by the caller when concurrent
+ *      access is possible.
+ */
+/*
+ *      pagebuf_cond_lock
+ *
+ *      pagebuf_cond_lock locks a buffer object, if it is not already locked.
+ *      Note that this in no way
+ *      locks the underlying pages, so it is only useful for synchronizing
+ *      concurrent use of page buffer objects, not for synchronizing independent
+ *      access to the underlying pages.
+ */
+int
+pagebuf_cond_lock(                      /* lock buffer, if not locked   */
+                                        /* returns -EBUSY if locked)    */
+        xfs_buf_t               *pb)
+{
+        int                     locked;
+        locked = down_trylock(&pb->pb_sema) == 0;
+        if (locked) {
+                PB_SET_OWNER(pb);
+        }
+        PB_TRACE(pb, "cond_lock", (long)locked);
+        return(locked ? 0 : -EBUSY);
+}
+#if defined(DEBUG) || defined(XFS_BLI_TRACE)
+/*
+ *      pagebuf_lock_value
+ *
+ *      Return lock value for a pagebuf
+ */
+int
+pagebuf_lock_value(
+        xfs_buf_t               *pb)
+{
+        return(atomic_read(&pb->pb_sema.count));
+}
+#endif
+/*
+ *      pagebuf_lock
+ *
+ *      pagebuf_lock locks a buffer object.  Note that this in no way
+ *      locks the underlying pages, so it is only useful for synchronizing
+ *      concurrent use of page buffer objects, not for synchronizing independent
+ *      access to the underlying pages.
+ */
+int
+pagebuf_lock(
+        xfs_buf_t               *pb)
+{
+        PB_TRACE(pb, "lock", 0);
+        if (atomic_read(&pb->pb_io_remaining))
+                blk_run_address_space(pb->pb_target->pbr_mapping);
+        down(&pb->pb_sema);
+        PB_SET_OWNER(pb);
+        PB_TRACE(pb, "locked", 0);
+        return 0;
+}
+/*
+ *      pagebuf_unlock
+ *
+ *      pagebuf_unlock releases the lock on the buffer object created by
+ *      pagebuf_lock or pagebuf_cond_lock (not any
+ *      pinning of underlying pages created by pagebuf_pin).
+ */
+void
+pagebuf_unlock(                         /* unlock buffer                */
+        xfs_buf_t               *pb)    /* buffer to unlock             */
+{
+        PB_CLEAR_OWNER(pb);
+        up(&pb->pb_sema);
+        PB_TRACE(pb, "unlock", 0);
+}
+/*
+ *      Pinning Buffer Storage in Memory
+ */
+/*
+ *      pagebuf_pin
+ *
+ *      pagebuf_pin locks all of the memory represented by a buffer in
+ *      memory.  Multiple calls to pagebuf_pin and pagebuf_unpin, for
+ *      the same or different buffers affecting a given page, will
+ *      properly count the number of outstanding "pin" requests.  The
+ *      buffer may be released after the pagebuf_pin and a different
+ *      buffer used when calling pagebuf_unpin, if desired.
+ *      pagebuf_pin should be used by the file system when it wants be
+ *      assured that no attempt will be made to force the affected
+ *      memory to disk.  It does not assure that a given logical page
+ *      will not be moved to a different physical page.
+ */
+void
+pagebuf_pin(
+        xfs_buf_t               *pb)
+{
+        atomic_inc(&pb->pb_pin_count);
+        PB_TRACE(pb, "pin", (long)pb->pb_pin_count.counter);
+}
+/*
+ *      pagebuf_unpin
+ *
+ *      pagebuf_unpin reverses the locking of memory performed by
+ *      pagebuf_pin.  Note that both functions affected the logical
+ *      pages associated with the buffer, not the buffer itself.
+ */
+void
+pagebuf_unpin(
+        xfs_buf_t               *pb)
+{
+        if (atomic_dec_and_test(&pb->pb_pin_count)) {
+                wake_up_all(&pb->pb_waiters);
+        }
+        PB_TRACE(pb, "unpin", (long)pb->pb_pin_count.counter);
+}
+int
+pagebuf_ispin(
+        xfs_buf_t               *pb)
+{
+        return atomic_read(&pb->pb_pin_count);
+}
+/*
+ *      pagebuf_wait_unpin
+ *
+ *      pagebuf_wait_unpin waits until all of the memory associated
+ *      with the buffer is not longer locked in memory.  It returns
+ *      immediately if none of the affected pages are locked.
+ */
+static inline void
+_pagebuf_wait_unpin(
+        xfs_buf_t               *pb)
+{
+        DECLARE_WAITQUEUE       (wait, current);
+        if (atomic_read(&pb->pb_pin_count) == 0)
+                return;
+        add_wait_queue(&pb->pb_waiters, &wait);
+        for (;;) {
+                set_current_state(TASK_UNINTERRUPTIBLE);
+                if (atomic_read(&pb->pb_pin_count) == 0)
+                        break;
+                if (atomic_read(&pb->pb_io_remaining))
+                        blk_run_address_space(pb->pb_target->pbr_mapping);
+                schedule();
+        }
+        remove_wait_queue(&pb->pb_waiters, &wait);
+        set_current_state(TASK_RUNNING);
+}
+/*
+ *      Buffer Utility Routines
+ */
+/*
+ *      pagebuf_iodone
+ *
+ *      pagebuf_iodone marks a buffer for which I/O is in progress
+ *      done with respect to that I/O.  The pb_iodone routine, if
+ *      present, will be called as a side-effect.
+ */
+STATIC void
+pagebuf_iodone_work(
+        void                    *v)
+{
+        xfs_buf_t               *bp = (xfs_buf_t *)v;
+        if (bp->pb_iodone)
+                (*(bp->pb_iodone))(bp);
+        else if (bp->pb_flags & PBF_ASYNC)
+                xfs_buf_relse(bp);
+}
+void
+pagebuf_iodone(
+        xfs_buf_t               *pb,
+        int                     dataio,
+        int                     schedule)
+{
+        pb->pb_flags &= ~(PBF_READ | PBF_WRITE);
+        if (pb->pb_error == 0) {
+                pb->pb_flags &= ~(PBF_PARTIAL | PBF_NONE);
+        }
+        PB_TRACE(pb, "iodone", pb->pb_iodone);
+        if ((pb->pb_iodone) || (pb->pb_flags & PBF_ASYNC)) {
+                if (schedule) {
+                        INIT_WORK(&pb->pb_iodone_work, pagebuf_iodone_work, pb);
+                        queue_work(dataio ? pagebuf_dataio_workqueue :
+                                pagebuf_logio_workqueue, &pb->pb_iodone_work);
+                } else {
+                        pagebuf_iodone_work(pb);
+                }
+        } else {
+                up(&pb->pb_iodonesema);
+        }
+}
+/*
+ *      pagebuf_ioerror
+ *
+ *      pagebuf_ioerror sets the error code for a buffer.
+ */
+void
+pagebuf_ioerror(                        /* mark/clear buffer error flag */
+        xfs_buf_t               *pb,    /* buffer to mark               */
+        int                     error)  /* error to store (0 if none)   */
+{
+        ASSERT(error >= 0 && error <= 0xffff);
+        pb->pb_error = (unsigned short)error;
+        PB_TRACE(pb, "ioerror", (unsigned long)error);
+}
+/*
+ *      pagebuf_iostart
+ *
+ *      pagebuf_iostart initiates I/O on a buffer, based on the flags supplied.
+ *      If necessary, it will arrange for any disk space allocation required,
+ *      and it will break up the request if the block mappings require it.
+ *      The pb_iodone routine in the buffer supplied will only be called
+ *      when all of the subsidiary I/O requests, if any, have been completed.
+ *      pagebuf_iostart calls the pagebuf_ioinitiate routine or
+ *      pagebuf_iorequest, if the former routine is not defined, to start
+ *      the I/O on a given low-level request.
+ */
+int
+pagebuf_iostart(                        /* start I/O on a buffer          */
+        xfs_buf_t               *pb,    /* buffer to start                */
+        page_buf_flags_t        flags)  /* PBF_LOCK, PBF_ASYNC, PBF_READ, */
+                                        /* PBF_WRITE, PBF_DELWRI,         */
+                                        /* PBF_DONT_BLOCK                 */
+{
+        int                     status = 0;
+        PB_TRACE(pb, "iostart", (unsigned long)flags);
+        if (flags & PBF_DELWRI) {
+                pb->pb_flags &= ~(PBF_READ | PBF_WRITE | PBF_ASYNC);
+                pb->pb_flags |= flags & (PBF_DELWRI | PBF_ASYNC);
+                pagebuf_delwri_queue(pb, 1);
+                return status;
+        }
+        pb->pb_flags &= ~(PBF_READ | PBF_WRITE | PBF_ASYNC | PBF_DELWRI | \
+                        PBF_READ_AHEAD | _PBF_RUN_QUEUES);
+        pb->pb_flags |= flags & (PBF_READ | PBF_WRITE | PBF_ASYNC | \
+                        PBF_READ_AHEAD | _PBF_RUN_QUEUES);
+        BUG_ON(pb->pb_bn == XFS_BUF_DADDR_NULL);
+        /* For writes allow an alternate strategy routine to precede
+         * the actual I/O request (which may not be issued at all in
+         * a shutdown situation, for example).
+         */
+        status = (flags & PBF_WRITE) ?
+                pagebuf_iostrategy(pb) : pagebuf_iorequest(pb);
+        /* Wait for I/O if we are not an async request.
+         * Note: async I/O request completion will release the buffer,
+         * and that can already be done by this point.  So using the
+         * buffer pointer from here on, after async I/O, is invalid.
+         */
+        if (!status && !(flags & PBF_ASYNC))
+                status = pagebuf_iowait(pb);
+        return status;
+}
+/*
+ * Helper routine for pagebuf_iorequest
+ */
+STATIC __inline__ int
+_pagebuf_iolocked(
+        xfs_buf_t               *pb)
+{
+        ASSERT(pb->pb_flags & (PBF_READ|PBF_WRITE));
+        if (pb->pb_flags & PBF_READ)
+                return pb->pb_locked;
+        return 0;
+}
+STATIC __inline__ void
+_pagebuf_iodone(
+        xfs_buf_t               *pb,
+        int                     schedule)
+{
+        if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {
+                pb->pb_locked = 0;
+                pagebuf_iodone(pb, (pb->pb_flags & PBF_FS_DATAIOD), schedule);
+        }
+}
+STATIC int
+bio_end_io_pagebuf(
+        struct bio              *bio,
+        unsigned int            bytes_done,
+        int                     error)
+{
+        xfs_buf_t               *pb = (xfs_buf_t *)bio->bi_private;
+        unsigned int            i, blocksize = pb->pb_target->pbr_bsize;
+        struct bio_vec          *bvec = bio->bi_io_vec;
+        if (bio->bi_size)
+                return 1;
+        if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
+                pb->pb_error = EIO;
+        for (i = 0; i < bio->bi_vcnt; i++, bvec++) {
+                struct page     *page = bvec->bv_page;
+                if (pb->pb_error) {
+                        SetPageError(page);
+                } else if (blocksize == PAGE_CACHE_SIZE) {
+                        SetPageUptodate(page);
+                } else if (!PagePrivate(page) &&
+                                (pb->pb_flags & _PBF_PAGE_CACHE)) {
+                        set_page_region(page, bvec->bv_offset, bvec->bv_len);
+                }
+                if (_pagebuf_iolocked(pb)) {
+                        unlock_page(page);
+                }
+        }
+        _pagebuf_iodone(pb, 1);
+        bio_put(bio);
+        return 0;
+}
+STATIC void
+_pagebuf_ioapply(
+        xfs_buf_t               *pb)
+{
+        int                     i, rw, map_i, total_nr_pages, nr_pages;
+        struct bio              *bio;
+        int                     offset = pb->pb_offset;
+        int                     size = pb->pb_count_desired;
+        sector_t                sector = pb->pb_bn;
+        unsigned int            blocksize = pb->pb_target->pbr_bsize;
+        int                     locking = _pagebuf_iolocked(pb);
+        total_nr_pages = pb->pb_page_count;
+        map_i = 0;
+        if (pb->pb_flags & _PBF_RUN_QUEUES) {
+                pb->pb_flags &= ~_PBF_RUN_QUEUES;
+                rw = (pb->pb_flags & PBF_READ) ? READ_SYNC : WRITE_SYNC;
+        } else {
+                rw = (pb->pb_flags & PBF_READ) ? READ : WRITE;
+        }
+        /* Special code path for reading a sub page size pagebuf in --
+         * we populate up the whole page, and hence the other metadata
+         * in the same page.  This optimization is only valid when the
+         * filesystem block size and the page size are equal.
+         */
+        if ((pb->pb_buffer_length < PAGE_CACHE_SIZE) &&
+            (pb->pb_flags & PBF_READ) && locking &&
+            (blocksize == PAGE_CACHE_SIZE)) {
+                bio = bio_alloc(GFP_NOIO, 1);
+                bio->bi_bdev = pb->pb_target->pbr_bdev;
+                bio->bi_sector = sector - (offset >> BBSHIFT);
+                bio->bi_end_io = bio_end_io_pagebuf;
+                bio->bi_private = pb;
+                bio_add_page(bio, pb->pb_pages[0], PAGE_CACHE_SIZE, 0);
+                size = 0;
+                atomic_inc(&pb->pb_io_remaining);
+                goto submit_io;
+        }
+        /* Lock down the pages which we need to for the request */
+        if (locking && (pb->pb_flags & PBF_WRITE) && (pb->pb_locked == 0)) {
+                for (i = 0; size; i++) {
+                        int             nbytes = PAGE_CACHE_SIZE - offset;
+                        struct page     *page = pb->pb_pages[i];
+                        if (nbytes > size)
+                                nbytes = size;
+                        lock_page(page);
+                        size -= nbytes;
+                        offset = 0;
+                }
+                offset = pb->pb_offset;
+                size = pb->pb_count_desired;
+        }
+next_chunk:
+        atomic_inc(&pb->pb_io_remaining);
+        nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT);
+        if (nr_pages > total_nr_pages)
+                nr_pages = total_nr_pages;
+        bio = bio_alloc(GFP_NOIO, nr_pages);
+        bio->bi_bdev = pb->pb_target->pbr_bdev;
+        bio->bi_sector = sector;
+        bio->bi_end_io = bio_end_io_pagebuf;
+        bio->bi_private = pb;
+        for (; size && nr_pages; nr_pages--, map_i++) {
+                int     nbytes = PAGE_CACHE_SIZE - offset;
+                if (nbytes > size)
+                        nbytes = size;
+                if (bio_add_page(bio, pb->pb_pages[map_i],
+                                        nbytes, offset) < nbytes)
+                        break;
+                offset = 0;
+                sector += nbytes >> BBSHIFT;
+                size -= nbytes;
+                total_nr_pages--;
+        }
+submit_io:
+        if (likely(bio->bi_size)) {
+                submit_bio(rw, bio);
+                if (size)
+                        goto next_chunk;
+        } else {
+                bio_put(bio);
+                pagebuf_ioerror(pb, EIO);
+        }
+}
+/*
+ *      pagebuf_iorequest -- the core I/O request routine.
+ */
+int
+pagebuf_iorequest(                      /* start real I/O               */
+        xfs_buf_t               *pb)    /* buffer to convey to device   */
+{
+        PB_TRACE(pb, "iorequest", 0);
+        if (pb->pb_flags & PBF_DELWRI) {
+                pagebuf_delwri_queue(pb, 1);
+                return 0;
+        }
+        if (pb->pb_flags & PBF_WRITE) {
+                _pagebuf_wait_unpin(pb);
+        }
+        pagebuf_hold(pb);
+        /* Set the count to 1 initially, this will stop an I/O
+         * completion callout which happens before we have started
+         * all the I/O from calling pagebuf_iodone too early.
+         */
+        atomic_set(&pb->pb_io_remaining, 1);
+        _pagebuf_ioapply(pb);
+        _pagebuf_iodone(pb, 0);
+        pagebuf_rele(pb);
+        return 0;
+}
+/*
+ *      pagebuf_iowait
+ *
+ *      pagebuf_iowait waits for I/O to complete on the buffer supplied.
+ *      It returns immediately if no I/O is pending.  In any case, it returns
+ *      the error code, if any, or 0 if there is no error.
+ */
+int
+pagebuf_iowait(
+        xfs_buf_t               *pb)
+{
+        PB_TRACE(pb, "iowait", 0);
+        if (atomic_read(&pb->pb_io_remaining))
+                blk_run_address_space(pb->pb_target->pbr_mapping);
+        down(&pb->pb_iodonesema);
+        PB_TRACE(pb, "iowaited", (long)pb->pb_error);
+        return pb->pb_error;
+}
+caddr_t
+pagebuf_offset(
+        xfs_buf_t               *pb,
+        size_t                  offset)
+{
+        struct page             *page;
+        offset += pb->pb_offset;
+        page = pb->pb_pages[offset >> PAGE_CACHE_SHIFT];
+        return (caddr_t) page_address(page) + (offset & (PAGE_CACHE_SIZE - 1));
+}
+/*
+ *      pagebuf_iomove
+ *
+ *      Move data into or out of a buffer.
+ */
+void
+pagebuf_iomove(
+        xfs_buf_t               *pb,    /* buffer to process            */
+        size_t                  boff,   /* starting buffer offset       */
+        size_t                  bsize,  /* length to copy               */
+        caddr_t                 data,   /* data address                 */
+        page_buf_rw_t           mode)   /* read/write flag              */
+{
+        size_t                  bend, cpoff, csize;
+        struct page             *page;
+        bend = boff + bsize;
+        while (boff < bend) {
+                page = pb->pb_pages[page_buf_btoct(boff + pb->pb_offset)];
+                cpoff = page_buf_poff(boff + pb->pb_offset);
+                csize = min_t(size_t,
+                              PAGE_CACHE_SIZE-cpoff, pb->pb_count_desired-boff);
+                ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE));
+                switch (mode) {
+                case PBRW_ZERO:
+                        memset(page_address(page) + cpoff, 0, csize);
+                        break;
+                case PBRW_READ:
+                        memcpy(data, page_address(page) + cpoff, csize);
+                        break;
+                case PBRW_WRITE:
+                        memcpy(page_address(page) + cpoff, data, csize);
+                }
+                boff += csize;
+                data += csize;
+        }
+}
+/*
+ *      Handling of buftargs.
+ */
+/*
+ * Wait for any bufs with callbacks that have been submitted but
+ * have not yet returned... walk the hash list for the target.
+ */
+void
+xfs_wait_buftarg(
+        xfs_buftarg_t   *btp)
+{
+        xfs_buf_t       *bp, *n;
+        xfs_bufhash_t   *hash;
+        uint            i;
+        for (i = 0; i < (1 << btp->bt_hashshift); i++) {
+                hash = &btp->bt_hash[i];
+again:
+                spin_lock(&hash->bh_lock);
+                list_for_each_entry_safe(bp, n, &hash->bh_list, pb_hash_list) {
+                        ASSERT(btp == bp->pb_target);
+                        if (!(bp->pb_flags & PBF_FS_MANAGED)) {
+                                spin_unlock(&hash->bh_lock);
+                                delay(100);
+                                goto again;
+                        }
+                }
+                spin_unlock(&hash->bh_lock);
+        }
+}
+/*
+ * Allocate buffer hash table for a given target.
+ * For devices containing metadata (i.e. not the log/realtime devices)
+ * we need to allocate a much larger hash table.
+ */
+STATIC void
+xfs_alloc_bufhash(
+        xfs_buftarg_t           *btp,
+        int                     external)
+{
+        unsigned int            i;
+        btp->bt_hashshift = external ? 3 : 8;   /* 8 or 256 buckets */
+        btp->bt_hashmask = (1 << btp->bt_hashshift) - 1;
+        btp->bt_hash = kmem_zalloc((1 << btp->bt_hashshift) *
+                                        sizeof(xfs_bufhash_t), KM_SLEEP);
+        for (i = 0; i < (1 << btp->bt_hashshift); i++) {
+                spin_lock_init(&btp->bt_hash[i].bh_lock);
+                INIT_LIST_HEAD(&btp->bt_hash[i].bh_list);
+        }
+}
+STATIC void
+xfs_free_bufhash(
+        xfs_buftarg_t           *btp)
+{
+        kmem_free(btp->bt_hash,
+                        (1 << btp->bt_hashshift) * sizeof(xfs_bufhash_t));
+        btp->bt_hash = NULL;
+}
+void
+xfs_free_buftarg(
+        xfs_buftarg_t           *btp,
+        int                     external)
+{
+        xfs_flush_buftarg(btp, 1);
+        if (external)
+                xfs_blkdev_put(btp->pbr_bdev);
+        xfs_free_bufhash(btp);
+        iput(btp->pbr_mapping->host);
+        kmem_free(btp, sizeof(*btp));
+}
+void
+xfs_incore_relse(
+        xfs_buftarg_t           *btp,
+        int                     delwri_only,
+        int                     wait)
+{
+        invalidate_bdev(btp->pbr_bdev, 1);
+        truncate_inode_pages(btp->pbr_mapping, 0LL);
+}
+STATIC int
+xfs_setsize_buftarg_flags(
+        xfs_buftarg_t           *btp,
+        unsigned int            blocksize,
+        unsigned int            sectorsize,
+        int                     verbose)
+{
+        btp->pbr_bsize = blocksize;
+        btp->pbr_sshift = ffs(sectorsize) - 1;
+        btp->pbr_smask = sectorsize - 1;
+        if (set_blocksize(btp->pbr_bdev, sectorsize)) {
+                printk(KERN_WARNING
+                        "XFS: Cannot set_blocksize to %u on device %s\n",
+                        sectorsize, XFS_BUFTARG_NAME(btp));
+                return EINVAL;
+        }
+        if (verbose &&
+            (PAGE_CACHE_SIZE / BITS_PER_LONG) > sectorsize) {
+                printk(KERN_WARNING
+                        "XFS: %u byte sectors in use on device %s.  "
+                        "This is suboptimal; %u or greater is ideal.\n",
+                        sectorsize, XFS_BUFTARG_NAME(btp),
+                        (unsigned int)PAGE_CACHE_SIZE / BITS_PER_LONG);
+        }
+        return 0;
+}
+/*
+* When allocating the initial buffer target we have not yet
+* read in the superblock, so don't know what sized sectors
+* are being used is at this early stage.  Play safe.
+*/
+STATIC int
+xfs_setsize_buftarg_early(
+        xfs_buftarg_t           *btp,
+        struct block_device     *bdev)
+{
+        return xfs_setsize_buftarg_flags(btp,
+                        PAGE_CACHE_SIZE, bdev_hardsect_size(bdev), 0);
+}
+int
+xfs_setsize_buftarg(
+        xfs_buftarg_t           *btp,
+        unsigned int            blocksize,
+        unsigned int            sectorsize)
+{
+        return xfs_setsize_buftarg_flags(btp, blocksize, sectorsize, 1);
+}
+STATIC int
+xfs_mapping_buftarg(
+        xfs_buftarg_t           *btp,
+        struct block_device     *bdev)
+{
+        struct backing_dev_info *bdi;
+        struct inode            *inode;
+        struct address_space    *mapping;
+        static struct address_space_operations mapping_aops = {
+                .sync_page = block_sync_page,
+        };
+        inode = new_inode(bdev->bd_inode->i_sb);
+        if (!inode) {
+                printk(KERN_WARNING
+                        "XFS: Cannot allocate mapping inode for device %s\n",
+                        XFS_BUFTARG_NAME(btp));
+                return ENOMEM;
+        }
+        inode->i_mode = S_IFBLK;
+        inode->i_bdev = bdev;
+        inode->i_rdev = bdev->bd_dev;
+        bdi = blk_get_backing_dev_info(bdev);
+        if (!bdi)
+                bdi = &default_backing_dev_info;
+        mapping = &inode->i_data;
+        mapping->a_ops = &mapping_aops;
+        mapping->backing_dev_info = bdi;
+        mapping_set_gfp_mask(mapping, GFP_NOFS);
+        btp->pbr_mapping = mapping;
+        return 0;
+}
+xfs_buftarg_t *
+xfs_alloc_buftarg(
+        struct block_device     *bdev,
+        int                     external)
+{
+        xfs_buftarg_t           *btp;
+        btp = kmem_zalloc(sizeof(*btp), KM_SLEEP);
+        btp->pbr_dev =  bdev->bd_dev;
+        btp->pbr_bdev = bdev;
+        if (xfs_setsize_buftarg_early(btp, bdev))
+                goto error;
+        if (xfs_mapping_buftarg(btp, bdev))
+                goto error;
+        xfs_alloc_bufhash(btp, external);
+        return btp;
+error:
+        kmem_free(btp, sizeof(*btp));
+        return NULL;
+}
+/*
+ * Pagebuf delayed write buffer handling
+ */
+STATIC LIST_HEAD(pbd_delwrite_queue);
+STATIC DEFINE_SPINLOCK(pbd_delwrite_lock);
+STATIC void
+pagebuf_delwri_queue(
+        xfs_buf_t               *pb,
+        int                     unlock)
+{
+        PB_TRACE(pb, "delwri_q", (long)unlock);
+        ASSERT(pb->pb_flags & PBF_DELWRI);
+        spin_lock(&pbd_delwrite_lock);
+        /* If already in the queue, dequeue and place at tail */
+        if (!list_empty(&pb->pb_list)) {
+                if (unlock) {
+                        atomic_dec(&pb->pb_hold);
+                }
+                list_del(&pb->pb_list);
+        }
+        list_add_tail(&pb->pb_list, &pbd_delwrite_queue);
+        pb->pb_queuetime = jiffies;
+        spin_unlock(&pbd_delwrite_lock);
+        if (unlock)
+                pagebuf_unlock(pb);
+}
+void
+pagebuf_delwri_dequeue(
+        xfs_buf_t               *pb)
+{
+        int                     dequeued = 0;
+        spin_lock(&pbd_delwrite_lock);
+        if ((pb->pb_flags & PBF_DELWRI) && !list_empty(&pb->pb_list)) {
+                list_del_init(&pb->pb_list);
+                dequeued = 1;
+        }
+        pb->pb_flags &= ~PBF_DELWRI;
+        spin_unlock(&pbd_delwrite_lock);
+        if (dequeued)
+                pagebuf_rele(pb);
+        PB_TRACE(pb, "delwri_dq", (long)dequeued);
+}
+STATIC void
+pagebuf_runall_queues(
+        struct workqueue_struct *queue)
+{
+        flush_workqueue(queue);
+}
+/* Defines for pagebuf daemon */
+STATIC DECLARE_COMPLETION(pagebuf_daemon_done);
+STATIC struct task_struct *pagebuf_daemon_task;
+STATIC int pagebuf_daemon_active;
+STATIC int force_flush;
+STATIC int
+pagebuf_daemon_wakeup(
+        int                     priority,
+        unsigned int            mask)
+{
+        force_flush = 1;
+        barrier();
+        wake_up_process(pagebuf_daemon_task);
+        return 0;
+}
+STATIC int
+pagebuf_daemon(
+        void                    *data)
+{
+        struct list_head        tmp;
+        unsigned long           age;
+        xfs_buftarg_t           *target;
+        xfs_buf_t               *pb, *n;
+        /*  Set up the thread  */
+        daemonize("xfsbufd");
+        current->flags |= PF_MEMALLOC;
+        pagebuf_daemon_task = current;
+        pagebuf_daemon_active = 1;
+        barrier();
+        INIT_LIST_HEAD(&tmp);
+        do {
+                try_to_freeze(PF_FREEZE);
+                set_current_state(TASK_INTERRUPTIBLE);
+                schedule_timeout((xfs_buf_timer_centisecs * HZ) / 100);
+                age = (xfs_buf_age_centisecs * HZ) / 100;
+                spin_lock(&pbd_delwrite_lock);
+                list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) {
+                        PB_TRACE(pb, "walkq1", (long)pagebuf_ispin(pb));
+                        ASSERT(pb->pb_flags & PBF_DELWRI);
+                        if (!pagebuf_ispin(pb) && !pagebuf_cond_lock(pb)) {
+                                if (!force_flush &&
+                                    time_before(jiffies,
+                                                pb->pb_queuetime + age)) {
+                                        pagebuf_unlock(pb);
+                                        break;
+                                }
+                                pb->pb_flags &= ~PBF_DELWRI;
+                                pb->pb_flags |= PBF_WRITE;
+                                list_move(&pb->pb_list, &tmp);
+                        }
+                }
+                spin_unlock(&pbd_delwrite_lock);
+                while (!list_empty(&tmp)) {
+                        pb = list_entry(tmp.next, xfs_buf_t, pb_list);
+                        target = pb->pb_target;
+                        list_del_init(&pb->pb_list);
+                        pagebuf_iostrategy(pb);
+                        blk_run_address_space(target->pbr_mapping);
+                }
+                if (as_list_len > 0)
+                        purge_addresses();
+                force_flush = 0;
+        } while (pagebuf_daemon_active);
+        complete_and_exit(&pagebuf_daemon_done, 0);
+}
+/*
+ * Go through all incore buffers, and release buffers if they belong to
+ * the given device. This is used in filesystem error handling to
+ * preserve the consistency of its metadata.
+ */
+int
+xfs_flush_buftarg(
+        xfs_buftarg_t           *target,
+        int                     wait)
+{
+        struct list_head        tmp;
+        xfs_buf_t               *pb, *n;
+        int                     pincount = 0;
+        pagebuf_runall_queues(pagebuf_dataio_workqueue);
+        pagebuf_runall_queues(pagebuf_logio_workqueue);
+        INIT_LIST_HEAD(&tmp);
+        spin_lock(&pbd_delwrite_lock);
+        list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) {
+                if (pb->pb_target != target)
+                        continue;
+                ASSERT(pb->pb_flags & PBF_DELWRI);
+                PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb));
+                if (pagebuf_ispin(pb)) {
+                        pincount++;
+                        continue;
+                }
+                pb->pb_flags &= ~PBF_DELWRI;
+                pb->pb_flags |= PBF_WRITE;
+                list_move(&pb->pb_list, &tmp);
+        }
+        spin_unlock(&pbd_delwrite_lock);
+        /*
+         * Dropped the delayed write list lock, now walk the temporary list
+         */
+        list_for_each_entry_safe(pb, n, &tmp, pb_list) {
+                if (wait)
+                        pb->pb_flags &= ~PBF_ASYNC;
+                else
+                        list_del_init(&pb->pb_list);
+                pagebuf_lock(pb);
+                pagebuf_iostrategy(pb);
+        }
+        /*
+         * Remaining list items must be flushed before returning
+         */
+        while (!list_empty(&tmp)) {
+                pb = list_entry(tmp.next, xfs_buf_t, pb_list);
+                list_del_init(&pb->pb_list);
+                xfs_iowait(pb);
+                xfs_buf_relse(pb);
+        }
+        if (wait)
+                blk_run_address_space(target->pbr_mapping);
+        return pincount;
+}
+STATIC int
+pagebuf_daemon_start(void)
+{
+        int             rval;
+        pagebuf_logio_workqueue = create_workqueue("xfslogd");
+        if (!pagebuf_logio_workqueue)
+                return -ENOMEM;
+        pagebuf_dataio_workqueue = create_workqueue("xfsdatad");
+        if (!pagebuf_dataio_workqueue) {
+                destroy_workqueue(pagebuf_logio_workqueue);
+                return -ENOMEM;
+        }
+        rval = kernel_thread(pagebuf_daemon, NULL, CLONE_FS|CLONE_FILES);
+        if (rval < 0) {
+                destroy_workqueue(pagebuf_logio_workqueue);
+                destroy_workqueue(pagebuf_dataio_workqueue);
+        }
+        return rval;
+}
+/*
+ * pagebuf_daemon_stop
+ *
+ * Note: do not mark as __exit, it is called from pagebuf_terminate.
+ */
+STATIC void
+pagebuf_daemon_stop(void)
+{
+        pagebuf_daemon_active = 0;
+        barrier();
+        wait_for_completion(&pagebuf_daemon_done);
+        destroy_workqueue(pagebuf_logio_workqueue);
+        destroy_workqueue(pagebuf_dataio_workqueue);
+}
+/*
+ *      Initialization and Termination
+ */
+int __init
+pagebuf_init(void)
+{
+        pagebuf_cache = kmem_cache_create("xfs_buf_t", sizeof(xfs_buf_t), 0,
+                        SLAB_HWCACHE_ALIGN, NULL, NULL);
+        if (pagebuf_cache == NULL) {
+                printk("XFS: couldn't init xfs_buf_t cache\n");
+                pagebuf_terminate();
+                return -ENOMEM;
+        }
+#ifdef PAGEBUF_TRACE
+        pagebuf_trace_buf = ktrace_alloc(PAGEBUF_TRACE_SIZE, KM_SLEEP);
+#endif
+        pagebuf_daemon_start();
+        pagebuf_shake = kmem_shake_register(pagebuf_daemon_wakeup);
+        if (pagebuf_shake == NULL) {
+                pagebuf_terminate();
+                return -ENOMEM;
+        }
+        return 0;
+}
+/*
+ *      pagebuf_terminate.
+ *
+ *      Note: do not mark as __exit, this is also called from the __init code.
+ */
+void
+pagebuf_terminate(void)
+{
+        pagebuf_daemon_stop();
+#ifdef PAGEBUF_TRACE
+        ktrace_free(pagebuf_trace_buf);
+#endif
+        kmem_zone_destroy(pagebuf_cache);
+        kmem_shake_deregister(pagebuf_shake);
+}
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
new file mode 100644
index 000000000000..74deed8e6d90
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -0,0 +1,591 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+/*
+ * Written by Steve Lord, Jim Mostek, Russell Cattelan at SGI
+ */
+#ifndef __XFS_BUF_H__
+#define __XFS_BUF_H__
+#include <linux/config.h>
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <asm/system.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/buffer_head.h>
+#include <linux/uio.h>
+/*
+ *      Base types
+ */
+#define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL))
+#define page_buf_ctob(pp)       ((pp) * PAGE_CACHE_SIZE)
+#define page_buf_btoc(dd)       (((dd) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT)
+#define page_buf_btoct(dd)      ((dd) >> PAGE_CACHE_SHIFT)
+#define page_buf_poff(aa)       ((aa) & ~PAGE_CACHE_MASK)
+typedef enum page_buf_rw_e {
+        PBRW_READ = 1,                  /* transfer into target memory */
+        PBRW_WRITE = 2,                 /* transfer from target memory */
+        PBRW_ZERO = 3                   /* Zero target memory */
+} page_buf_rw_t;
+typedef enum page_buf_flags_e {         /* pb_flags values */
+        PBF_READ = (1 << 0),    /* buffer intended for reading from device */
+        PBF_WRITE = (1 << 1),   /* buffer intended for writing to device   */
+        PBF_MAPPED = (1 << 2),  /* buffer mapped (pb_addr valid)           */
+        PBF_PARTIAL = (1 << 3), /* buffer partially read                   */
+        PBF_ASYNC = (1 << 4),   /* initiator will not wait for completion  */
+        PBF_NONE = (1 << 5),    /* buffer not read at all                  */
+        PBF_DELWRI = (1 << 6),  /* buffer has dirty pages                  */
+        PBF_STALE = (1 << 7),   /* buffer has been staled, do not find it  */
+        PBF_FS_MANAGED = (1 << 8),  /* filesystem controls freeing memory  */
+        PBF_FS_DATAIOD = (1 << 9),  /* schedule IO completion on fs datad  */
+        PBF_FORCEIO = (1 << 10),    /* ignore any cache state              */
+        PBF_FLUSH = (1 << 11),      /* flush disk write cache              */
+        PBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead             */
+        /* flags used only as arguments to access routines */
+        PBF_LOCK = (1 << 14),       /* lock requested                      */
+        PBF_TRYLOCK = (1 << 15),    /* lock requested, but do not wait     */
+        PBF_DONT_BLOCK = (1 << 16), /* do not block in current thread      */
+        /* flags used only internally */
+        _PBF_PAGE_CACHE = (1 << 17),/* backed by pagecache                 */
+        _PBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc()              */
+        _PBF_RUN_QUEUES = (1 << 19),/* run block device task queue         */
+} page_buf_flags_t;
+#define PBF_UPDATE (PBF_READ | PBF_WRITE)
+#define PBF_NOT_DONE(pb) (((pb)->pb_flags & (PBF_PARTIAL|PBF_NONE)) != 0)
+#define PBF_DONE(pb) (((pb)->pb_flags & (PBF_PARTIAL|PBF_NONE)) == 0)
+typedef struct xfs_bufhash {
+        struct list_head        bh_list;
+        spinlock_t              bh_lock;
+} xfs_bufhash_t;
+typedef struct xfs_buftarg {
+        dev_t                   pbr_dev;
+        struct block_device     *pbr_bdev;
+        struct address_space    *pbr_mapping;
+        unsigned int            pbr_bsize;
+        unsigned int            pbr_sshift;
+        size_t                  pbr_smask;
+        /* per-device buffer hash table */
+        uint                    bt_hashmask;
+        uint                    bt_hashshift;
+        xfs_bufhash_t           *bt_hash;
+} xfs_buftarg_t;
+/*
+ *      xfs_buf_t:  Buffer structure for page cache-based buffers
+ *
+ * This buffer structure is used by the page cache buffer management routines
+ * to refer to an assembly of pages forming a logical buffer.  The actual I/O
+ * is performed with buffer_head structures, as required by drivers.
+ * 
+ * The buffer structure is used on temporary basis only, and discarded when
+ * released.  The real data storage is recorded in the page cache.  Metadata is
+ * hashed to the block device on which the file system resides.
+ */
+struct xfs_buf;
+/* call-back function on I/O completion */
+typedef void (*page_buf_iodone_t)(struct xfs_buf *);
+/* call-back function on I/O completion */
+typedef void (*page_buf_relse_t)(struct xfs_buf *);
+/* pre-write function */
+typedef int (*page_buf_bdstrat_t)(struct xfs_buf *);
+#define PB_PAGES        2
+typedef struct xfs_buf {
+        struct semaphore        pb_sema;        /* semaphore for lockables  */
+        unsigned long           pb_queuetime;   /* time buffer was queued   */
+        atomic_t                pb_pin_count;   /* pin count                */
+        wait_queue_head_t       pb_waiters;     /* unpin waiters            */
+        struct list_head        pb_list;
+        page_buf_flags_t        pb_flags;       /* status flags */
+        struct list_head        pb_hash_list;   /* hash table list */
+        xfs_bufhash_t           *pb_hash;       /* hash table list start */
+        xfs_buftarg_t           *pb_target;     /* buffer target (device) */
+        atomic_t                pb_hold;        /* reference count */
+        xfs_daddr_t             pb_bn;          /* block number for I/O */
+        loff_t                  pb_file_offset; /* offset in file */
+        size_t                  pb_buffer_length; /* size of buffer in bytes */
+        size_t                  pb_count_desired; /* desired transfer size */
+        void                    *pb_addr;       /* virtual address of buffer */
+        struct work_struct      pb_iodone_work;
+        atomic_t                pb_io_remaining;/* #outstanding I/O requests */
+        page_buf_iodone_t       pb_iodone;      /* I/O completion function */
+        page_buf_relse_t        pb_relse;       /* releasing function */
+        page_buf_bdstrat_t      pb_strat;       /* pre-write function */
+        struct semaphore        pb_iodonesema;  /* Semaphore for I/O waiters */
+        void                    *pb_fspriv;
+        void                    *pb_fspriv2;
+        void                    *pb_fspriv3;
+        unsigned short          pb_error;       /* error code on I/O */
+        unsigned short          pb_locked;      /* page array is locked */
+        unsigned int            pb_page_count;  /* size of page array */
+        unsigned int            pb_offset;      /* page offset in first page */
+        struct page             **pb_pages;     /* array of page pointers */
+        struct page             *pb_page_array[PB_PAGES]; /* inline pages */
+#ifdef PAGEBUF_LOCK_TRACKING
+        int                     pb_last_holder;
+#endif
+} xfs_buf_t;
+/* Finding and Reading Buffers */
+extern xfs_buf_t *_pagebuf_find(        /* find buffer for block if     */
+                                        /* the block is in memory       */
+                xfs_buftarg_t *,        /* inode for block              */
+                loff_t,                 /* starting offset of range     */
+                size_t,                 /* length of range              */
+                page_buf_flags_t,       /* PBF_LOCK                     */
+                xfs_buf_t *);           /* newly allocated buffer       */
+#define xfs_incore(buftarg,blkno,len,lockit) \
+        _pagebuf_find(buftarg, blkno ,len, lockit, NULL)
+extern xfs_buf_t *xfs_buf_get_flags(    /* allocate a buffer            */
+                xfs_buftarg_t *,        /* inode for buffer             */
+                loff_t,                 /* starting offset of range     */
+                size_t,                 /* length of range              */
+                page_buf_flags_t);      /* PBF_LOCK, PBF_READ,          */
+                                        /* PBF_ASYNC                    */
+#define xfs_buf_get(target, blkno, len, flags) \
+        xfs_buf_get_flags((target), (blkno), (len), PBF_LOCK | PBF_MAPPED)
+extern xfs_buf_t *xfs_buf_read_flags(   /* allocate and read a buffer   */
+                xfs_buftarg_t *,        /* inode for buffer             */
+                loff_t,                 /* starting offset of range     */
+                size_t,                 /* length of range              */
+                page_buf_flags_t);      /* PBF_LOCK, PBF_ASYNC          */
+#define xfs_buf_read(target, blkno, len, flags) \
+        xfs_buf_read_flags((target), (blkno), (len), PBF_LOCK | PBF_MAPPED)
+extern xfs_buf_t *pagebuf_lookup(
+                xfs_buftarg_t *,
+                loff_t,                 /* starting offset of range     */
+                size_t,                 /* length of range              */
+                page_buf_flags_t);      /* PBF_READ, PBF_WRITE,         */
+                                        /* PBF_FORCEIO,                 */
+extern xfs_buf_t *pagebuf_get_empty(    /* allocate pagebuf struct with */
+                                        /*  no memory or disk address   */
+                size_t len,
+                xfs_buftarg_t *);       /* mount point "fake" inode     */
+extern xfs_buf_t *pagebuf_get_no_daddr(/* allocate pagebuf struct       */
+                                        /* without disk address         */
+                size_t len,
+                xfs_buftarg_t *);       /* mount point "fake" inode     */
+extern int pagebuf_associate_memory(
+                xfs_buf_t *,
+                void *,
+                size_t);
+extern void pagebuf_hold(               /* increment reference count    */
+                xfs_buf_t *);           /* buffer to hold               */
+extern void pagebuf_readahead(          /* read ahead into cache        */
+                xfs_buftarg_t  *,       /* target for buffer (or NULL)  */
+                loff_t,                 /* starting offset of range     */
+                size_t,                 /* length of range              */
+                page_buf_flags_t);      /* additional read flags        */
+/* Releasing Buffers */
+extern void pagebuf_free(               /* deallocate a buffer          */
+                xfs_buf_t *);           /* buffer to deallocate         */
+extern void pagebuf_rele(               /* release hold on a buffer     */
+                xfs_buf_t *);           /* buffer to release            */
+/* Locking and Unlocking Buffers */
+extern int pagebuf_cond_lock(           /* lock buffer, if not locked   */
+                                        /* (returns -EBUSY if locked)   */
+                xfs_buf_t *);           /* buffer to lock               */
+extern int pagebuf_lock_value(          /* return count on lock         */
+                xfs_buf_t *);          /* buffer to check              */
+extern int pagebuf_lock(                /* lock buffer                  */
+                xfs_buf_t *);          /* buffer to lock               */
+extern void pagebuf_unlock(             /* unlock buffer                */
+                xfs_buf_t *);           /* buffer to unlock             */
+/* Buffer Read and Write Routines */
+extern void pagebuf_iodone(             /* mark buffer I/O complete     */
+                xfs_buf_t *,            /* buffer to mark               */
+                int,                    /* use data/log helper thread.  */
+                int);                   /* run completion locally, or in
+                                         * a helper thread.             */
+extern void pagebuf_ioerror(            /* mark buffer in error (or not) */
+                xfs_buf_t *,            /* buffer to mark               */
+                int);                   /* error to store (0 if none)   */
+extern int pagebuf_iostart(             /* start I/O on a buffer        */
+                xfs_buf_t *,            /* buffer to start              */
+                page_buf_flags_t);      /* PBF_LOCK, PBF_ASYNC,         */
+                                        /* PBF_READ, PBF_WRITE,         */
+                                        /* PBF_DELWRI                   */
+extern int pagebuf_iorequest(           /* start real I/O               */
+                xfs_buf_t *);           /* buffer to convey to device   */
+extern int pagebuf_iowait(              /* wait for buffer I/O done     */
+                xfs_buf_t *);           /* buffer to wait on            */
+extern void pagebuf_iomove(             /* move data in/out of pagebuf  */
+                xfs_buf_t *,            /* buffer to manipulate         */
+                size_t,                 /* starting buffer offset       */
+                size_t,                 /* length in buffer             */
+                caddr_t,                /* data pointer                 */
+                page_buf_rw_t);         /* direction                    */
+static inline int pagebuf_iostrategy(xfs_buf_t *pb)
+{
+        return pb->pb_strat ? pb->pb_strat(pb) : pagebuf_iorequest(pb);
+}
+static inline int pagebuf_geterror(xfs_buf_t *pb)
+{
+        return pb ? pb->pb_error : ENOMEM;
+}
+/* Buffer Utility Routines */
+extern caddr_t pagebuf_offset(          /* pointer at offset in buffer  */
+                xfs_buf_t *,            /* buffer to offset into        */
+                size_t);                /* offset                       */
+/* Pinning Buffer Storage in Memory */
+extern void pagebuf_pin(                /* pin buffer in memory         */
+                xfs_buf_t *);           /* buffer to pin                */
+extern void pagebuf_unpin(              /* unpin buffered data          */
+                xfs_buf_t *);           /* buffer to unpin              */
+extern int pagebuf_ispin(               /* check if buffer is pinned    */
+                xfs_buf_t *);           /* buffer to check              */
+/* Delayed Write Buffer Routines */
+extern void pagebuf_delwri_dequeue(xfs_buf_t *);
+/* Buffer Daemon Setup Routines */
+extern int pagebuf_init(void);
+extern void pagebuf_terminate(void);
+#ifdef PAGEBUF_TRACE
+extern ktrace_t *pagebuf_trace_buf;
+extern void pagebuf_trace(
+                xfs_buf_t *,            /* buffer being traced          */
+                char *,                 /* description of operation     */
+                void *,                 /* arbitrary diagnostic value   */
+                void *);                /* return address               */
+#else
+# define pagebuf_trace(pb, id, ptr, ra) do { } while (0)
+#endif
+#define pagebuf_target_name(target)     \
+        ({ char __b[BDEVNAME_SIZE]; bdevname((target)->pbr_bdev, __b); __b; })
+/* These are just for xfs_syncsub... it sets an internal variable
+ * then passes it to VOP_FLUSH_PAGES or adds the flags to a newly gotten buf_t
+ */
+#define XFS_B_ASYNC             PBF_ASYNC
+#define XFS_B_DELWRI            PBF_DELWRI
+#define XFS_B_READ              PBF_READ
+#define XFS_B_WRITE             PBF_WRITE
+#define XFS_B_STALE             PBF_STALE
+#define XFS_BUF_TRYLOCK         PBF_TRYLOCK
+#define XFS_INCORE_TRYLOCK      PBF_TRYLOCK
+#define XFS_BUF_LOCK            PBF_LOCK
+#define XFS_BUF_MAPPED          PBF_MAPPED
+#define BUF_BUSY                PBF_DONT_BLOCK
+#define XFS_BUF_BFLAGS(x)       ((x)->pb_flags)
+#define XFS_BUF_ZEROFLAGS(x)    \
+        ((x)->pb_flags &= ~(PBF_READ|PBF_WRITE|PBF_ASYNC|PBF_DELWRI))
+#define XFS_BUF_STALE(x)        ((x)->pb_flags |= XFS_B_STALE)
+#define XFS_BUF_UNSTALE(x)      ((x)->pb_flags &= ~XFS_B_STALE)
+#define XFS_BUF_ISSTALE(x)      ((x)->pb_flags & XFS_B_STALE)
+#define XFS_BUF_SUPER_STALE(x)  do {                            \
+                                        XFS_BUF_STALE(x);       \
+                                        pagebuf_delwri_dequeue(x);      \
+                                        XFS_BUF_DONE(x);        \
+                                } while (0)
+#define XFS_BUF_MANAGE          PBF_FS_MANAGED
+#define XFS_BUF_UNMANAGE(x)     ((x)->pb_flags &= ~PBF_FS_MANAGED)
+#define XFS_BUF_DELAYWRITE(x)    ((x)->pb_flags |= PBF_DELWRI)
+#define XFS_BUF_UNDELAYWRITE(x)  pagebuf_delwri_dequeue(x)
+#define XFS_BUF_ISDELAYWRITE(x)  ((x)->pb_flags & PBF_DELWRI)
+#define XFS_BUF_ERROR(x,no)      pagebuf_ioerror(x,no)
+#define XFS_BUF_GETERROR(x)      pagebuf_geterror(x)
+#define XFS_BUF_ISERROR(x)       (pagebuf_geterror(x)?1:0)
+#define XFS_BUF_DONE(x)          ((x)->pb_flags &= ~(PBF_PARTIAL|PBF_NONE))
+#define XFS_BUF_UNDONE(x)        ((x)->pb_flags |= PBF_PARTIAL|PBF_NONE)
+#define XFS_BUF_ISDONE(x)        (!(PBF_NOT_DONE(x)))
+#define XFS_BUF_BUSY(x)          ((x)->pb_flags |= PBF_FORCEIO)
+#define XFS_BUF_UNBUSY(x)        ((x)->pb_flags &= ~PBF_FORCEIO)
+#define XFS_BUF_ISBUSY(x)        (1)
+#define XFS_BUF_ASYNC(x)         ((x)->pb_flags |= PBF_ASYNC)
+#define XFS_BUF_UNASYNC(x)       ((x)->pb_flags &= ~PBF_ASYNC)
+#define XFS_BUF_ISASYNC(x)       ((x)->pb_flags & PBF_ASYNC)
+#define XFS_BUF_FLUSH(x)         ((x)->pb_flags |= PBF_FLUSH)
+#define XFS_BUF_UNFLUSH(x)       ((x)->pb_flags &= ~PBF_FLUSH)
+#define XFS_BUF_ISFLUSH(x)       ((x)->pb_flags & PBF_FLUSH)
+#define XFS_BUF_SHUT(x)          printk("XFS_BUF_SHUT not implemented yet\n")
+#define XFS_BUF_UNSHUT(x)        printk("XFS_BUF_UNSHUT not implemented yet\n")
+#define XFS_BUF_ISSHUT(x)        (0)
+#define XFS_BUF_HOLD(x)         pagebuf_hold(x)
+#define XFS_BUF_READ(x)         ((x)->pb_flags |= PBF_READ)
+#define XFS_BUF_UNREAD(x)       ((x)->pb_flags &= ~PBF_READ)
+#define XFS_BUF_ISREAD(x)       ((x)->pb_flags & PBF_READ)
+#define XFS_BUF_WRITE(x)        ((x)->pb_flags |= PBF_WRITE)
+#define XFS_BUF_UNWRITE(x)      ((x)->pb_flags &= ~PBF_WRITE)
+#define XFS_BUF_ISWRITE(x)      ((x)->pb_flags & PBF_WRITE)
+#define XFS_BUF_ISUNINITIAL(x)   (0)
+#define XFS_BUF_UNUNINITIAL(x)   (0)
+#define XFS_BUF_BP_ISMAPPED(bp)  1
+#define XFS_BUF_DATAIO(x)       ((x)->pb_flags |= PBF_FS_DATAIOD)
+#define XFS_BUF_UNDATAIO(x)     ((x)->pb_flags &= ~PBF_FS_DATAIOD)
+#define XFS_BUF_IODONE_FUNC(buf)        (buf)->pb_iodone
+#define XFS_BUF_SET_IODONE_FUNC(buf, func)      \
+                        (buf)->pb_iodone = (func)
+#define XFS_BUF_CLR_IODONE_FUNC(buf)            \
+                        (buf)->pb_iodone = NULL
+#define XFS_BUF_SET_BDSTRAT_FUNC(buf, func)     \
+                        (buf)->pb_strat = (func)
+#define XFS_BUF_CLR_BDSTRAT_FUNC(buf)           \
+                        (buf)->pb_strat = NULL
+#define XFS_BUF_FSPRIVATE(buf, type)            \
+                        ((type)(buf)->pb_fspriv)
+#define XFS_BUF_SET_FSPRIVATE(buf, value)       \
+                        (buf)->pb_fspriv = (void *)(value)
+#define XFS_BUF_FSPRIVATE2(buf, type)           \
+                        ((type)(buf)->pb_fspriv2)
+#define XFS_BUF_SET_FSPRIVATE2(buf, value)      \
+                        (buf)->pb_fspriv2 = (void *)(value)
+#define XFS_BUF_FSPRIVATE3(buf, type)           \
+                        ((type)(buf)->pb_fspriv3)
+#define XFS_BUF_SET_FSPRIVATE3(buf, value)      \
+                        (buf)->pb_fspriv3  = (void *)(value)
+#define XFS_BUF_SET_START(buf)
+#define XFS_BUF_SET_BRELSE_FUNC(buf, value) \
+                        (buf)->pb_relse = (value)
+#define XFS_BUF_PTR(bp)         (xfs_caddr_t)((bp)->pb_addr)
+extern inline xfs_caddr_t xfs_buf_offset(xfs_buf_t *bp, size_t offset)
+{
+        if (bp->pb_flags & PBF_MAPPED)
+                return XFS_BUF_PTR(bp) + offset;
+        return (xfs_caddr_t) pagebuf_offset(bp, offset);
+}
+#define XFS_BUF_SET_PTR(bp, val, count)         \
+                                pagebuf_associate_memory(bp, val, count)
+#define XFS_BUF_ADDR(bp)        ((bp)->pb_bn)
+#define XFS_BUF_SET_ADDR(bp, blk)               \
+                        ((bp)->pb_bn = (xfs_daddr_t)(blk))
+#define XFS_BUF_OFFSET(bp)      ((bp)->pb_file_offset)
+#define XFS_BUF_SET_OFFSET(bp, off)             \
+                        ((bp)->pb_file_offset = (off))
+#define XFS_BUF_COUNT(bp)       ((bp)->pb_count_desired)
+#define XFS_BUF_SET_COUNT(bp, cnt)              \
+                        ((bp)->pb_count_desired = (cnt))
+#define XFS_BUF_SIZE(bp)        ((bp)->pb_buffer_length)
+#define XFS_BUF_SET_SIZE(bp, cnt)               \
+                        ((bp)->pb_buffer_length = (cnt))
+#define XFS_BUF_SET_VTYPE_REF(bp, type, ref)
+#define XFS_BUF_SET_VTYPE(bp, type)
+#define XFS_BUF_SET_REF(bp, ref)
+#define XFS_BUF_ISPINNED(bp)    pagebuf_ispin(bp)
+#define XFS_BUF_VALUSEMA(bp)    pagebuf_lock_value(bp)
+#define XFS_BUF_CPSEMA(bp)      (pagebuf_cond_lock(bp) == 0)
+#define XFS_BUF_VSEMA(bp)       pagebuf_unlock(bp)
+#define XFS_BUF_PSEMA(bp,x)     pagebuf_lock(bp)
+#define XFS_BUF_V_IODONESEMA(bp) up(&bp->pb_iodonesema);
+/* setup the buffer target from a buftarg structure */
+#define XFS_BUF_SET_TARGET(bp, target)  \
+                (bp)->pb_target = (target)
+#define XFS_BUF_TARGET(bp)      ((bp)->pb_target)
+#define XFS_BUFTARG_NAME(target)        \
+                pagebuf_target_name(target)
+#define XFS_BUF_SET_VTYPE_REF(bp, type, ref)
+#define XFS_BUF_SET_VTYPE(bp, type)
+#define XFS_BUF_SET_REF(bp, ref)
+static inline int       xfs_bawrite(void *mp, xfs_buf_t *bp)
+{
+        bp->pb_fspriv3 = mp;
+        bp->pb_strat = xfs_bdstrat_cb;
+        pagebuf_delwri_dequeue(bp);
+        return pagebuf_iostart(bp, PBF_WRITE | PBF_ASYNC | _PBF_RUN_QUEUES);
+}
+static inline void      xfs_buf_relse(xfs_buf_t *bp)
+{
+        if (!bp->pb_relse)
+                pagebuf_unlock(bp);
+        pagebuf_rele(bp);
+}
+#define xfs_bpin(bp)            pagebuf_pin(bp)
+#define xfs_bunpin(bp)          pagebuf_unpin(bp)
+#define xfs_buftrace(id, bp)    \
+            pagebuf_trace(bp, id, NULL, (void *)__builtin_return_address(0))
+#define xfs_biodone(pb)             \
+            pagebuf_iodone(pb, (pb->pb_flags & PBF_FS_DATAIOD), 0)
+#define xfs_biomove(pb, off, len, data, rw) \
+            pagebuf_iomove((pb), (off), (len), (data), \
+                ((rw) == XFS_B_WRITE) ? PBRW_WRITE : PBRW_READ)
+#define xfs_biozero(pb, off, len) \
+            pagebuf_iomove((pb), (off), (len), NULL, PBRW_ZERO)
+static inline int       XFS_bwrite(xfs_buf_t *pb)
+{
+        int     iowait = (pb->pb_flags & PBF_ASYNC) == 0;
+        int     error = 0;
+        if (!iowait)
+                pb->pb_flags |= _PBF_RUN_QUEUES;
+        pagebuf_delwri_dequeue(pb);
+        pagebuf_iostrategy(pb);
+        if (iowait) {
+                error = pagebuf_iowait(pb);
+                xfs_buf_relse(pb);
+        }
+        return error;
+}
+#define XFS_bdwrite(pb)              \
+            pagebuf_iostart(pb, PBF_DELWRI | PBF_ASYNC)
+static inline int xfs_bdwrite(void *mp, xfs_buf_t *bp)
+{
+        bp->pb_strat = xfs_bdstrat_cb;
+        bp->pb_fspriv3 = mp;
+        return pagebuf_iostart(bp, PBF_DELWRI | PBF_ASYNC);
+}
+#define XFS_bdstrat(bp) pagebuf_iorequest(bp)
+#define xfs_iowait(pb)  pagebuf_iowait(pb)
+#define xfs_baread(target, rablkno, ralen)  \
+        pagebuf_readahead((target), (rablkno), (ralen), PBF_DONT_BLOCK)
+#define xfs_buf_get_empty(len, target)  pagebuf_get_empty((len), (target))
+#define xfs_buf_get_noaddr(len, target) pagebuf_get_no_daddr((len), (target))
+#define xfs_buf_free(bp)                pagebuf_free(bp)
+/*
+ *      Handling of buftargs.
+ */
+extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int);
+extern void xfs_free_buftarg(xfs_buftarg_t *, int);
+extern void xfs_wait_buftarg(xfs_buftarg_t *);
+extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
+extern void xfs_incore_relse(xfs_buftarg_t *, int, int);
+extern int xfs_flush_buftarg(xfs_buftarg_t *, int);
+#define xfs_getsize_buftarg(buftarg) \
+        block_size((buftarg)->pbr_bdev)
+#define xfs_readonly_buftarg(buftarg) \
+        bdev_read_only((buftarg)->pbr_bdev)
+#define xfs_binval(buftarg) \
+        xfs_flush_buftarg(buftarg, 1)
+#define XFS_bflush(buftarg) \
+        xfs_flush_buftarg(buftarg, 1)
+#endif  /* __XFS_BUF_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_cred.h b/fs/xfs/linux-2.6/xfs_cred.h
new file mode 100644
index 000000000000..00c45849d41a
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_cred.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_CRED_H__
+#define __XFS_CRED_H__
+/*
+ * Credentials
+ */
+typedef struct cred {
+        /* EMPTY */
+} cred_t;
+extern struct cred *sys_cred;
+/* this is a hack.. (assums sys_cred is the only cred_t in the system) */
+static __inline int capable_cred(cred_t *cr, int cid)
+{
+        return (cr == sys_cred) ? 1 : capable(cid);
+}
+#endif  /* __XFS_CRED_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
new file mode 100644
index 000000000000..f372a1a5e168
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -0,0 +1,205 @@
+/*
+ * Copyright (c) 2004-2005 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#include "xfs.h"
+#include "xfs_types.h"
+#include "xfs_dmapi.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_dir.h"
+#include "xfs_mount.h"
+#include "xfs_export.h"
+/*
+ * XFS encode and decodes the fileid portion of NFS filehandles
+ * itself instead of letting the generic NFS code do it.  This
+ * allows filesystems with 64 bit inode numbers to be exported.
+ *
+ * Note that a side effect is that xfs_vget() won't be passed a
+ * zero inode/generation pair under normal circumstances.  As
+ * however a malicious client could send us such data, the check
+ * remains in that code.
+ */
+STATIC struct dentry *
+linvfs_decode_fh(
+        struct super_block      *sb,
+        __u32                   *fh,
+        int                     fh_len,
+        int                     fileid_type,
+        int (*acceptable)(
+                void            *context,
+                struct dentry   *de),
+        void                    *context)
+{
+        xfs_fid2_t              ifid;
+        xfs_fid2_t              pfid;
+        void                    *parent = NULL;
+        int                     is64 = 0;
+        __u32                   *p = fh;
+#if XFS_BIG_INUMS
+        is64 = (fileid_type & XFS_FILEID_TYPE_64FLAG);
+        fileid_type &= ~XFS_FILEID_TYPE_64FLAG;
+#endif
+        /*
+         * Note that we only accept fileids which are long enough
+         * rather than allow the parent generation number to default
+         * to zero.  XFS considers zero a valid generation number not
+         * an invalid/wildcard value.  There's little point printk'ing
+         * a warning here as we don't have the client information
+         * which would make such a warning useful.
+         */
+        if (fileid_type > 2 ||
+            fh_len < xfs_fileid_length((fileid_type == 2), is64))
+                return NULL;
+        p = xfs_fileid_decode_fid2(p, &ifid, is64);
+        if (fileid_type == 2) {
+                p = xfs_fileid_decode_fid2(p, &pfid, is64);
+                parent = &pfid;
+        }
+        
+        fh = (__u32 *)&ifid;
+        return find_exported_dentry(sb, fh, parent, acceptable, context);
+}
+STATIC int
+linvfs_encode_fh(
+        struct dentry           *dentry,
+        __u32                   *fh,
+        int                     *max_len,
+        int                     connectable)
+{
+        struct inode            *inode = dentry->d_inode;
+        int                     type = 1;
+        __u32                   *p = fh;
+        int                     len;
+        int                     is64 = 0;
+#if XFS_BIG_INUMS
+        vfs_t                   *vfs = LINVFS_GET_VFS(inode->i_sb);
+        xfs_mount_t             *mp = XFS_VFSTOM(vfs);
+        
+        if (!(mp->m_flags & XFS_MOUNT_32BITINOOPT)) {
+                /* filesystem may contain 64bit inode numbers */
+                is64 = XFS_FILEID_TYPE_64FLAG;
+        }
+#endif
+        /* Directories don't need their parent encoded, they have ".." */
+        if (S_ISDIR(inode->i_mode))
+            connectable = 0;
+        /*
+         * Only encode if there is enough space given.  In practice
+         * this means we can't export a filesystem with 64bit inodes
+         * over NFSv2 with the subtree_check export option; the other
+         * seven combinations work.  The real answer is "don't use v2".
+         */
+        len = xfs_fileid_length(connectable, is64);
+        if (*max_len < len)
+                return 255;
+        *max_len = len;
+        p = xfs_fileid_encode_inode(p, inode, is64);
+        if (connectable) {
+                spin_lock(&dentry->d_lock);
+                p = xfs_fileid_encode_inode(p, dentry->d_parent->d_inode, is64);
+                spin_unlock(&dentry->d_lock);
+                type = 2;
+        }
+        BUG_ON((p - fh) != len);
+        return type | is64;
+}
+STATIC struct dentry *
+linvfs_get_dentry(
+        struct super_block      *sb,
+        void                    *data)
+{
+        vnode_t                 *vp;
+        struct inode            *inode;
+        struct dentry           *result;
+        vfs_t                   *vfsp = LINVFS_GET_VFS(sb);
+        int                     error;
+        VFS_VGET(vfsp, &vp, (fid_t *)data, error);
+        if (error || vp == NULL)
+                return ERR_PTR(-ESTALE) ;
+        inode = LINVFS_GET_IP(vp);
+        result = d_alloc_anon(inode);
+        if (!result) {
+                iput(inode);
+                return ERR_PTR(-ENOMEM);
+        }
+        return result;
+}
+STATIC struct dentry *
+linvfs_get_parent(
+        struct dentry           *child)
+{
+        int                     error;
+        vnode_t                 *vp, *cvp;
+        struct dentry           *parent;
+        struct dentry           dotdot;
+        dotdot.d_name.name = "..";
+        dotdot.d_name.len = 2;
+        dotdot.d_inode = NULL;
+        cvp = NULL;
+        vp = LINVFS_GET_VP(child->d_inode);
+        VOP_LOOKUP(vp, &dotdot, &cvp, 0, NULL, NULL, error);
+        if (unlikely(error))
+                return ERR_PTR(-error);
+        parent = d_alloc_anon(LINVFS_GET_IP(cvp));
+        if (unlikely(!parent)) {
+                VN_RELE(cvp);
+                return ERR_PTR(-ENOMEM);
+        }
+        return parent;
+}
+struct export_operations linvfs_export_ops = {
+        .decode_fh              = linvfs_decode_fh,
+        .encode_fh              = linvfs_encode_fh,
+        .get_parent             = linvfs_get_parent,
+        .get_dentry             = linvfs_get_dentry,
+};
diff --git a/fs/xfs/linux-2.6/xfs_export.h b/fs/xfs/linux-2.6/xfs_export.h
new file mode 100644
index 000000000000..60b2abac1c18
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_export.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2005 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_EXPORT_H__
+#define __XFS_EXPORT_H__
+/*
+ * Common defines for code related to exporting XFS filesystems over NFS.
+ *
+ * The NFS fileid goes out on the wire as an array of
+ * 32bit unsigned ints in host order.  There are 5 possible
+ * formats.
+ *
+ * (1)  fileid_type=0x00
+ *      (no fileid data; handled by the generic code)
+ *
+ * (2)  fileid_type=0x01
+ *      inode-num
+ *      generation
+ *
+ * (3)  fileid_type=0x02
+ *      inode-num
+ *      generation
+ *      parent-inode-num
+ *      parent-generation
+ *
+ * (4)  fileid_type=0x81
+ *      inode-num-lo32
+ *      inode-num-hi32
+ *      generation
+ *
+ * (5)  fileid_type=0x82
+ *      inode-num-lo32
+ *      inode-num-hi32
+ *      generation
+ *      parent-inode-num-lo32
+ *      parent-inode-num-hi32
+ *      parent-generation
+ *
+ * Note, the NFS filehandle also includes an fsid portion which
+ * may have an inode number in it.  That number is hardcoded to
+ * 32bits and there is no way for XFS to intercept it.  In
+ * practice this means when exporting an XFS filesytem with 64bit
+ * inodes you should either export the mountpoint (rather than
+ * a subdirectory) or use the "fsid" export option.
+ */
+/* This flag goes on the wire.  Don't play with it. */
+#define XFS_FILEID_TYPE_64FLAG  0x80    /* NFS fileid has 64bit inodes */
+/* Calculate the length in u32 units of the fileid data */
+static inline int
+xfs_fileid_length(int hasparent, int is64)
+{
+        return hasparent ? (is64 ? 6 : 4) : (is64 ? 3 : 2);
+}
+/*
+ * Decode encoded inode information (either for the inode itself
+ * or the parent) into an xfs_fid2_t structure.  Advances and
+ * returns the new data pointer
+ */
+static inline __u32 *
+xfs_fileid_decode_fid2(__u32 *p, xfs_fid2_t *fid, int is64)
+{
+        fid->fid_len = sizeof(xfs_fid2_t) - sizeof(fid->fid_len);
+        fid->fid_pad = 0;
+        fid->fid_ino = *p++;
+#if XFS_BIG_INUMS
+        if (is64)
+                fid->fid_ino |= (((__u64)(*p++)) << 32);
+#endif
+        fid->fid_gen = *p++;
+        return p;
+}
+/*
+ * Encode inode information (either for the inode itself or the
+ * parent) into a fileid buffer.  Advances and returns the new
+ * data pointer.
+ */
+static inline __u32 *
+xfs_fileid_encode_inode(__u32 *p, struct inode *inode, int is64)
+{
+        *p++ = (__u32)inode->i_ino;
+#if XFS_BIG_INUMS
+        if (is64)
+                *p++ = (__u32)(inode->i_ino >> 32);
+#endif
+        *p++ = inode->i_generation;
+        return p;
+}
+#endif  /* __XFS_EXPORT_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
new file mode 100644
index 000000000000..9f057a4a5b06
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -0,0 +1,573 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#include "xfs.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_sb.h"
+#include "xfs_dir.h"
+#include "xfs_dir2.h"
+#include "xfs_trans.h"
+#include "xfs_dmapi.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_alloc.h"
+#include "xfs_btree.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dir_sf.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_error.h"
+#include "xfs_rw.h"
+#include "xfs_ioctl32.h"
+#include <linux/dcache.h>
+#include <linux/smp_lock.h>
+static struct vm_operations_struct linvfs_file_vm_ops;
+STATIC inline ssize_t
+__linvfs_read(
+        struct kiocb            *iocb,
+        char                    __user *buf,
+        int                     ioflags,
+        size_t                  count,
+        loff_t                  pos)
+{
+        struct iovec            iov = {buf, count};
+        struct file             *file = iocb->ki_filp;
+        vnode_t                 *vp = LINVFS_GET_VP(file->f_dentry->d_inode);
+        ssize_t                 rval;
+        BUG_ON(iocb->ki_pos != pos);
+        if (unlikely(file->f_flags & O_DIRECT))
+                ioflags |= IO_ISDIRECT;
+        VOP_READ(vp, iocb, &iov, 1, &iocb->ki_pos, ioflags, NULL, rval);
+        return rval;
+}
+STATIC ssize_t
+linvfs_aio_read(
+        struct kiocb            *iocb,
+        char                    __user *buf,
+        size_t                  count,
+        loff_t                  pos)
+{
+        return __linvfs_read(iocb, buf, IO_ISAIO, count, pos);
+}
+STATIC ssize_t
+linvfs_aio_read_invis(
+        struct kiocb            *iocb,
+        char                    __user *buf,
+        size_t                  count,
+        loff_t                  pos)
+{
+        return __linvfs_read(iocb, buf, IO_ISAIO|IO_INVIS, count, pos);
+}
+STATIC inline ssize_t
+__linvfs_write(
+        struct kiocb    *iocb,
+        const char      __user *buf,
+        int             ioflags,
+        size_t          count,
+        loff_t          pos)
+{
+        struct iovec    iov = {(void __user *)buf, count};
+        struct file     *file = iocb->ki_filp;
+        struct inode    *inode = file->f_mapping->host;
+        vnode_t         *vp = LINVFS_GET_VP(inode);
+        ssize_t         rval;
+        BUG_ON(iocb->ki_pos != pos);
+        if (unlikely(file->f_flags & O_DIRECT))
+                ioflags |= IO_ISDIRECT;
+        VOP_WRITE(vp, iocb, &iov, 1, &iocb->ki_pos, ioflags, NULL, rval);
+        return rval;
+}
+STATIC ssize_t
+linvfs_aio_write(
+        struct kiocb            *iocb,
+        const char              __user *buf,
+        size_t                  count,
+        loff_t                  pos)
+{
+        return __linvfs_write(iocb, buf, IO_ISAIO, count, pos);
+}
+STATIC ssize_t
+linvfs_aio_write_invis(
+        struct kiocb            *iocb,
+        const char              __user *buf,
+        size_t                  count,
+        loff_t                  pos)
+{
+        return __linvfs_write(iocb, buf, IO_ISAIO|IO_INVIS, count, pos);
+}
+STATIC inline ssize_t
+__linvfs_readv(
+        struct file             *file,
+        const struct iovec      *iov,
+        int                     ioflags,
+        unsigned long           nr_segs,
+        loff_t                  *ppos)
+{
+        struct inode    *inode = file->f_mapping->host;
+        vnode_t         *vp = LINVFS_GET_VP(inode);
+        struct          kiocb kiocb;
+        ssize_t         rval;
+        init_sync_kiocb(&kiocb, file);
+        kiocb.ki_pos = *ppos;
+        if (unlikely(file->f_flags & O_DIRECT))
+                ioflags |= IO_ISDIRECT;
+        VOP_READ(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, ioflags, NULL, rval);
+        *ppos = kiocb.ki_pos;
+        return rval;
+}
+STATIC ssize_t
+linvfs_readv(
+        struct file             *file,
+        const struct iovec      *iov,
+        unsigned long           nr_segs,
+        loff_t                  *ppos)
+{
+        return __linvfs_readv(file, iov, 0, nr_segs, ppos);
+}
+STATIC ssize_t
+linvfs_readv_invis(
+        struct file             *file,
+        const struct iovec      *iov,
+        unsigned long           nr_segs,
+        loff_t                  *ppos)
+{
+        return __linvfs_readv(file, iov, IO_INVIS, nr_segs, ppos);
+}
+STATIC inline ssize_t
+__linvfs_writev(
+        struct file             *file,
+        const struct iovec      *iov,
+        int                     ioflags,
+        unsigned long           nr_segs,
+        loff_t                  *ppos)
+{
+        struct inode    *inode = file->f_mapping->host;
+        vnode_t         *vp = LINVFS_GET_VP(inode);
+        struct          kiocb kiocb;
+        ssize_t         rval;
+        init_sync_kiocb(&kiocb, file);
+        kiocb.ki_pos = *ppos;
+        if (unlikely(file->f_flags & O_DIRECT))
+                ioflags |= IO_ISDIRECT;
+        VOP_WRITE(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, ioflags, NULL, rval);
+        *ppos = kiocb.ki_pos;
+        return rval;
+}
+STATIC ssize_t
+linvfs_writev(
+        struct file             *file,
+        const struct iovec      *iov,
+        unsigned long           nr_segs,
+        loff_t                  *ppos)
+{
+        return __linvfs_writev(file, iov, 0, nr_segs, ppos);
+}
+STATIC ssize_t
+linvfs_writev_invis(
+        struct file             *file,
+        const struct iovec      *iov,
+        unsigned long           nr_segs,
+        loff_t                  *ppos)
+{
+        return __linvfs_writev(file, iov, IO_INVIS, nr_segs, ppos);
+}
+STATIC ssize_t
+linvfs_sendfile(
+        struct file             *filp,
+        loff_t                  *ppos,
+        size_t                  count,
+        read_actor_t            actor,
+        void                    *target)
+{
+        vnode_t                 *vp = LINVFS_GET_VP(filp->f_dentry->d_inode);
+        ssize_t                 rval;
+        VOP_SENDFILE(vp, filp, ppos, 0, count, actor, target, NULL, rval);
+        return rval;
+}
+STATIC int
+linvfs_open(
+        struct inode    *inode,
+        struct file     *filp)
+{
+        vnode_t         *vp = LINVFS_GET_VP(inode);
+        int             error;
+        if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
+                return -EFBIG;
+        ASSERT(vp);
+        VOP_OPEN(vp, NULL, error);
+        return -error;
+}
+STATIC int
+linvfs_release(
+        struct inode    *inode,
+        struct file     *filp)
+{
+        vnode_t         *vp = LINVFS_GET_VP(inode);
+        int             error = 0;
+        if (vp)
+                VOP_RELEASE(vp, error);
+        return -error;
+}
+STATIC int
+linvfs_fsync(
+        struct file     *filp,
+        struct dentry   *dentry,
+        int             datasync)
+{
+        struct inode    *inode = dentry->d_inode;
+        vnode_t         *vp = LINVFS_GET_VP(inode);
+        int             error;
+        int             flags = FSYNC_WAIT;
+        if (datasync)
+                flags |= FSYNC_DATA;
+        ASSERT(vp);
+        VOP_FSYNC(vp, flags, NULL, (xfs_off_t)0, (xfs_off_t)-1, error);
+        return -error;
+}
+/*
+ * linvfs_readdir maps to VOP_READDIR().
+ * We need to build a uio, cred, ...
+ */
+#define nextdp(dp)      ((struct xfs_dirent *)((char *)(dp) + (dp)->d_reclen))
+STATIC int
+linvfs_readdir(
+        struct file     *filp,
+        void            *dirent,
+        filldir_t       filldir)
+{
+        int             error = 0;
+        vnode_t         *vp;
+        uio_t           uio;
+        iovec_t         iov;
+        int             eof = 0;
+        caddr_t         read_buf;
+        int             namelen, size = 0;
+        size_t          rlen = PAGE_CACHE_SIZE;
+        xfs_off_t       start_offset, curr_offset;
+        xfs_dirent_t    *dbp = NULL;
+        vp = LINVFS_GET_VP(filp->f_dentry->d_inode);
+        ASSERT(vp);
+        /* Try fairly hard to get memory */
+        do {
+                if ((read_buf = (caddr_t)kmalloc(rlen, GFP_KERNEL)))
+                        break;
+                rlen >>= 1;
+        } while (rlen >= 1024);
+        if (read_buf == NULL)
+                return -ENOMEM;
+        uio.uio_iov = &iov;
+        uio.uio_segflg = UIO_SYSSPACE;
+        curr_offset = filp->f_pos;
+        if (filp->f_pos != 0x7fffffff)
+                uio.uio_offset = filp->f_pos;
+        else
+                uio.uio_offset = 0xffffffff;
+        while (!eof) {
+                uio.uio_resid = iov.iov_len = rlen;
+                iov.iov_base = read_buf;
+                uio.uio_iovcnt = 1;
+                start_offset = uio.uio_offset;
+                VOP_READDIR(vp, &uio, NULL, &eof, error);
+                if ((uio.uio_offset == start_offset) || error) {
+                        size = 0;
+                        break;
+                }
+                size = rlen - uio.uio_resid;
+                dbp = (xfs_dirent_t *)read_buf;
+                while (size > 0) {
+                        namelen = strlen(dbp->d_name);
+                        if (filldir(dirent, dbp->d_name, namelen,
+                                        (loff_t) curr_offset & 0x7fffffff,
+                                        (ino_t) dbp->d_ino,
+                                        DT_UNKNOWN)) {
+                                goto done;
+                        }
+                        size -= dbp->d_reclen;
+                        curr_offset = (loff_t)dbp->d_off /* & 0x7fffffff */;
+                        dbp = nextdp(dbp);
+                }
+        }
+done:
+        if (!error) {
+                if (size == 0)
+                        filp->f_pos = uio.uio_offset & 0x7fffffff;
+                else if (dbp)
+                        filp->f_pos = curr_offset;
+        }
+        kfree(read_buf);
+        return -error;
+}
+STATIC int
+linvfs_file_mmap(
+        struct file     *filp,
+        struct vm_area_struct *vma)
+{
+        struct inode    *ip = filp->f_dentry->d_inode;
+        vnode_t         *vp = LINVFS_GET_VP(ip);
+        vattr_t         va = { .va_mask = XFS_AT_UPDATIME };
+        int             error;
+        if (vp->v_vfsp->vfs_flag & VFS_DMI) {
+                xfs_mount_t     *mp = XFS_VFSTOM(vp->v_vfsp);
+                error = -XFS_SEND_MMAP(mp, vma, 0);
+                if (error)
+                        return error;
+        }
+        vma->vm_ops = &linvfs_file_vm_ops;
+        VOP_SETATTR(vp, &va, XFS_AT_UPDATIME, NULL, error);
+        if (!error)
+                vn_revalidate(vp);      /* update Linux inode flags */
+        return 0;
+}
+STATIC long
+linvfs_ioctl(
+        struct file     *filp,
+        unsigned int    cmd,
+        unsigned long   arg)
+{
+        int             error;
+        struct inode *inode = filp->f_dentry->d_inode;
+        vnode_t         *vp = LINVFS_GET_VP(inode);
+        VOP_IOCTL(vp, inode, filp, 0, cmd, (void __user *)arg, error);
+        VMODIFY(vp);
+        /* NOTE:  some of the ioctl's return positive #'s as a
+         *        byte count indicating success, such as
+         *        readlink_by_handle.  So we don't "sign flip"
+         *        like most other routines.  This means true
+         *        errors need to be returned as a negative value.
+         */
+        return error;
+}
+STATIC long
+linvfs_ioctl_invis(
+        struct file     *filp,
+        unsigned int    cmd,
+        unsigned long   arg)
+{
+        int             error;
+        struct inode *inode = filp->f_dentry->d_inode;
+        vnode_t         *vp = LINVFS_GET_VP(inode);
+        ASSERT(vp);
+        VOP_IOCTL(vp, inode, filp, IO_INVIS, cmd, (void __user *)arg, error);
+        VMODIFY(vp);
+        /* NOTE:  some of the ioctl's return positive #'s as a
+         *        byte count indicating success, such as
+         *        readlink_by_handle.  So we don't "sign flip"
+         *        like most other routines.  This means true
+         *        errors need to be returned as a negative value.
+         */
+        return error;
+}
+#ifdef HAVE_VMOP_MPROTECT
+STATIC int
+linvfs_mprotect(
+        struct vm_area_struct *vma,
+        unsigned int    newflags)
+{
+        vnode_t         *vp = LINVFS_GET_VP(vma->vm_file->f_dentry->d_inode);
+        int             error = 0;
+        if (vp->v_vfsp->vfs_flag & VFS_DMI) {
+                if ((vma->vm_flags & VM_MAYSHARE) &&
+                    (newflags & VM_WRITE) && !(vma->vm_flags & VM_WRITE)) {
+                        xfs_mount_t     *mp = XFS_VFSTOM(vp->v_vfsp);
+                        error = XFS_SEND_MMAP(mp, vma, VM_WRITE);
+                    }
+        }
+        return error;
+}
+#endif /* HAVE_VMOP_MPROTECT */
+#ifdef HAVE_FOP_OPEN_EXEC
+/* If the user is attempting to execute a file that is offline then
+ * we have to trigger a DMAPI READ event before the file is marked as busy
+ * otherwise the invisible I/O will not be able to write to the file to bring
+ * it back online.
+ */
+STATIC int
+linvfs_open_exec(
+        struct inode    *inode)
+{
+        vnode_t         *vp = LINVFS_GET_VP(inode);
+        xfs_mount_t     *mp = XFS_VFSTOM(vp->v_vfsp);
+        int             error = 0;
+        bhv_desc_t      *bdp;
+        xfs_inode_t     *ip;
+        if (vp->v_vfsp->vfs_flag & VFS_DMI) {
+                bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops);
+                if (!bdp) {
+                        error = -EINVAL;
+                        goto open_exec_out;
+                }
+                ip = XFS_BHVTOI(bdp);
+                if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ)) {
+                        error = -XFS_SEND_DATA(mp, DM_EVENT_READ, vp,
+                                               0, 0, 0, NULL);
+                }
+        }
+open_exec_out:
+        return error;
+}
+#endif /* HAVE_FOP_OPEN_EXEC */
+struct file_operations linvfs_file_operations = {
+        .llseek         = generic_file_llseek,
+        .read           = do_sync_read,
+        .write          = do_sync_write,
+        .readv          = linvfs_readv,
+        .writev         = linvfs_writev,
+        .aio_read       = linvfs_aio_read,
+        .aio_write      = linvfs_aio_write,
+        .sendfile       = linvfs_sendfile,
+        .unlocked_ioctl = linvfs_ioctl,
+#ifdef CONFIG_COMPAT
+        .compat_ioctl   = xfs_compat_ioctl,
+#endif
+        .mmap           = linvfs_file_mmap,
+        .open           = linvfs_open,
+        .release        = linvfs_release,
+        .fsync          = linvfs_fsync,
+#ifdef HAVE_FOP_OPEN_EXEC
+        .open_exec      = linvfs_open_exec,
+#endif
+};
+struct file_operations linvfs_invis_file_operations = {
+        .llseek         = generic_file_llseek,
+        .read           = do_sync_read,
+        .write          = do_sync_write,
+        .readv          = linvfs_readv_invis,
+        .writev         = linvfs_writev_invis,
+        .aio_read       = linvfs_aio_read_invis,
+        .aio_write      = linvfs_aio_write_invis,
+        .sendfile       = linvfs_sendfile,
+        .unlocked_ioctl = linvfs_ioctl_invis,
+#ifdef CONFIG_COMPAT
+        .compat_ioctl   = xfs_compat_invis_ioctl,
+#endif
+        .mmap           = linvfs_file_mmap,
+        .open           = linvfs_open,
+        .release        = linvfs_release,
+        .fsync          = linvfs_fsync,
+};
+struct file_operations linvfs_dir_operations = {
+        .read           = generic_read_dir,
+        .readdir        = linvfs_readdir,
+        .unlocked_ioctl = linvfs_ioctl,
+        .fsync          = linvfs_fsync,
+};
+static struct vm_operations_struct linvfs_file_vm_ops = {
+        .nopage         = filemap_nopage,
+        .populate       = filemap_populate,
+#ifdef HAVE_VMOP_MPROTECT
+        .mprotect       = linvfs_mprotect,
+#endif
+};
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
new file mode 100644
index 000000000000..05ebd30ec96f
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#include "xfs.h"
+/*
+ * Stub for no-op vnode operations that return error status.
+ */
+int
+fs_noerr(void)
+{
+        return 0;
+}
+/*
+ * Operation unsupported under this file system.
+ */
+int
+fs_nosys(void)
+{
+        return ENOSYS;
+}
+/*
+ * Stub for inactive, strategy, and read/write lock/unlock.  Does nothing.
+ */
+/* ARGSUSED */
+void
+fs_noval(void)
+{
+}
+/*
+ * vnode pcache layer for vnode_tosspages.
+ * 'last' parameter unused but left in for IRIX compatibility
+ */
+void
+fs_tosspages(
+        bhv_desc_t      *bdp,
+        xfs_off_t       first,
+        xfs_off_t       last,
+        int             fiopt)
+{
+        vnode_t         *vp = BHV_TO_VNODE(bdp);
+        struct inode    *ip = LINVFS_GET_IP(vp);
+        if (VN_CACHED(vp))
+                truncate_inode_pages(ip->i_mapping, first);
+}
+/*
+ * vnode pcache layer for vnode_flushinval_pages.
+ * 'last' parameter unused but left in for IRIX compatibility
+ */
+void
+fs_flushinval_pages(
+        bhv_desc_t      *bdp,
+        xfs_off_t       first,
+        xfs_off_t       last,
+        int             fiopt)
+{
+        vnode_t         *vp = BHV_TO_VNODE(bdp);
+        struct inode    *ip = LINVFS_GET_IP(vp);
+        if (VN_CACHED(vp)) {
+                filemap_fdatawrite(ip->i_mapping);
+                filemap_fdatawait(ip->i_mapping);
+                truncate_inode_pages(ip->i_mapping, first);
+        }
+}
+/*
+ * vnode pcache layer for vnode_flush_pages.
+ * 'last' parameter unused but left in for IRIX compatibility
+ */
+int
+fs_flush_pages(
+        bhv_desc_t      *bdp,
+        xfs_off_t       first,
+        xfs_off_t       last,
+        uint64_t        flags,
+        int             fiopt)
+{
+        vnode_t         *vp = BHV_TO_VNODE(bdp);
+        struct inode    *ip = LINVFS_GET_IP(vp);
+        if (VN_CACHED(vp)) {
+                filemap_fdatawrite(ip->i_mapping);
+                filemap_fdatawait(ip->i_mapping);
+        }
+        return 0;
+}
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.h b/fs/xfs/linux-2.6/xfs_fs_subr.h
new file mode 100644
index 000000000000..2db9ddbd4567
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2000, 2002 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SUBR_H__
+#define __XFS_SUBR_H__
+/*
+ * Utilities shared among file system implementations.
+ */
+struct cred;
+extern int      fs_noerr(void);
+extern int      fs_nosys(void);
+extern void     fs_noval(void);
+extern void     fs_tosspages(bhv_desc_t *, xfs_off_t, xfs_off_t, int);
+extern void     fs_flushinval_pages(bhv_desc_t *, xfs_off_t, xfs_off_t, int);
+extern int      fs_flush_pages(bhv_desc_t *, xfs_off_t, xfs_off_t, uint64_t, int);
+#endif  /* __XFS_FS_SUBR_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c
new file mode 100644
index 000000000000..a6da5b4fd240
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_globals.c
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+/*
+ * This file contains globals needed by XFS that were normally defined
+ * somewhere else in IRIX.
+ */
+#include "xfs.h"
+#include "xfs_cred.h"
+#include "xfs_sysctl.h"
+/*
+ * System memory size - used to scale certain data structures in XFS.
+ */
+unsigned long xfs_physmem;
+/*
+ * Tunable XFS parameters.  xfs_params is required even when CONFIG_SYSCTL=n,
+ * other XFS code uses these values.  Times are measured in centisecs (i.e.
+ * 100ths of a second).
+ */
+xfs_param_t xfs_params = {
+                          /*    MIN             DFLT            MAX     */
+        .restrict_chown = {     0,              1,              1       },
+        .sgid_inherit   = {     0,              0,              1       },
+        .symlink_mode   = {     0,              0,              1       },
+        .panic_mask     = {     0,              0,              127     },
+        .error_level    = {     0,              3,              11      },
+        .syncd_timer    = {     1*100,          30*100,         7200*100},
+        .stats_clear    = {     0,              0,              1       },
+        .inherit_sync   = {     0,              1,              1       },
+        .inherit_nodump = {     0,              1,              1       },
+        .inherit_noatim = {     0,              1,              1       },
+        .xfs_buf_timer  = {     100/2,          1*100,          30*100  },
+        .xfs_buf_age    = {     1*100,          15*100,         7200*100},
+        .inherit_nosym  = {     0,              0,              1       },
+        .rotorstep      = {     1,              1,              255     },
+};
+/*
+ * Global system credential structure.
+ */
+cred_t sys_cred_val, *sys_cred = &sys_cred_val;
diff --git a/fs/xfs/linux-2.6/xfs_globals.h b/fs/xfs/linux-2.6/xfs_globals.h
new file mode 100644
index 000000000000..e81e2f38a853
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_globals.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_GLOBALS_H__
+#define __XFS_GLOBALS_H__
+/*
+ * This file declares globals needed by XFS that were normally defined
+ * somewhere else in IRIX.
+ */
+extern uint64_t xfs_panic_mask;         /* set to cause more panics */
+extern unsigned long xfs_physmem;
+extern struct cred *sys_cred;
+#endif  /* __XFS_GLOBALS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
new file mode 100644
index 000000000000..69809eef8a54
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -0,0 +1,1336 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_dir.h"
+#include "xfs_dir2.h"
+#include "xfs_alloc.h"
+#include "xfs_dmapi.h"
+#include "xfs_mount.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dir_sf.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_bmap.h"
+#include "xfs_bit.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_itable.h"
+#include "xfs_rw.h"
+#include "xfs_acl.h"
+#include "xfs_cap.h"
+#include "xfs_mac.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_utils.h"
+#include "xfs_dfrag.h"
+#include "xfs_fsops.h"
+#include <linux/dcache.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/pagemap.h>
+/*
+ * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to
+ * a file or fs handle.
+ *
+ * XFS_IOC_PATH_TO_FSHANDLE
+ *    returns fs handle for a mount point or path within that mount point
+ * XFS_IOC_FD_TO_HANDLE
+ *    returns full handle for a FD opened in user space
+ * XFS_IOC_PATH_TO_HANDLE
+ *    returns full handle for a path
+ */
+STATIC int
+xfs_find_handle(
+        unsigned int            cmd,
+        void                    __user *arg)
+{
+        int                     hsize;
+        xfs_handle_t            handle;
+        xfs_fsop_handlereq_t    hreq;
+        struct inode            *inode;
+        struct vnode            *vp;
+        if (copy_from_user(&hreq, arg, sizeof(hreq)))
+                return -XFS_ERROR(EFAULT);
+        memset((char *)&handle, 0, sizeof(handle));
+        switch (cmd) {
+        case XFS_IOC_PATH_TO_FSHANDLE:
+        case XFS_IOC_PATH_TO_HANDLE: {
+                struct nameidata        nd;
+                int                     error;
+                error = user_path_walk_link((const char __user *)hreq.path, &nd);
+                if (error)
+                        return error;
+                ASSERT(nd.dentry);
+                ASSERT(nd.dentry->d_inode);
+                inode = igrab(nd.dentry->d_inode);
+                path_release(&nd);
+                break;
+        }
+        case XFS_IOC_FD_TO_HANDLE: {
+                struct file     *file;
+                file = fget(hreq.fd);
+                if (!file)
+                    return -EBADF;
+                ASSERT(file->f_dentry);
+                ASSERT(file->f_dentry->d_inode);
+                inode = igrab(file->f_dentry->d_inode);
+                fput(file);
+                break;
+        }
+        default:
+                ASSERT(0);
+                return -XFS_ERROR(EINVAL);
+        }
+        if (inode->i_sb->s_magic != XFS_SB_MAGIC) {
+                /* we're not in XFS anymore, Toto */
+                iput(inode);
+                return -XFS_ERROR(EINVAL);
+        }
+        /* we need the vnode */
+        vp = LINVFS_GET_VP(inode);
+        if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
+                iput(inode);
+                return -XFS_ERROR(EBADF);
+        }
+        /* now we can grab the fsid */
+        memcpy(&handle.ha_fsid, vp->v_vfsp->vfs_altfsid, sizeof(xfs_fsid_t));
+        hsize = sizeof(xfs_fsid_t);
+        if (cmd != XFS_IOC_PATH_TO_FSHANDLE) {
+                xfs_inode_t     *ip;
+                bhv_desc_t      *bhv;
+                int             lock_mode;
+                /* need to get access to the xfs_inode to read the generation */
+                bhv = vn_bhv_lookup_unlocked(VN_BHV_HEAD(vp), &xfs_vnodeops);
+                ASSERT(bhv);
+                ip = XFS_BHVTOI(bhv);
+                ASSERT(ip);
+                lock_mode = xfs_ilock_map_shared(ip);
+                /* fill in fid section of handle from inode */
+                handle.ha_fid.xfs_fid_len = sizeof(xfs_fid_t) -
+                                            sizeof(handle.ha_fid.xfs_fid_len);
+                handle.ha_fid.xfs_fid_pad = 0;
+                handle.ha_fid.xfs_fid_gen = ip->i_d.di_gen;
+                handle.ha_fid.xfs_fid_ino = ip->i_ino;
+                xfs_iunlock_map_shared(ip, lock_mode);
+                hsize = XFS_HSIZE(handle);
+        }
+        /* now copy our handle into the user buffer & write out the size */
+        if (copy_to_user(hreq.ohandle, &handle, hsize) ||
+            copy_to_user(hreq.ohandlen, &hsize, sizeof(__s32))) {
+                iput(inode);
+                return -XFS_ERROR(EFAULT);
+        }
+        iput(inode);
+        return 0;
+}
+/*
+ * Convert userspace handle data into vnode (and inode).
+ * We [ab]use the fact that all the fsop_handlereq ioctl calls
+ * have a data structure argument whose first component is always
+ * a xfs_fsop_handlereq_t, so we can cast to and from this type.
+ * This allows us to optimise the copy_from_user calls and gives
+ * a handy, shared routine.
+ *
+ * If no error, caller must always VN_RELE the returned vp.
+ */
+STATIC int
+xfs_vget_fsop_handlereq(
+        xfs_mount_t             *mp,
+        struct inode            *parinode,      /* parent inode pointer    */
+        xfs_fsop_handlereq_t    *hreq,
+        vnode_t                 **vp,
+        struct inode            **inode)
+{
+        void                    __user *hanp;
+        size_t                  hlen;
+        xfs_fid_t               *xfid;
+        xfs_handle_t            *handlep;
+        xfs_handle_t            handle;
+        xfs_inode_t             *ip;
+        struct inode            *inodep;
+        vnode_t                 *vpp;
+        xfs_ino_t               ino;
+        __u32                   igen;
+        int                     error;
+        /*
+         * Only allow handle opens under a directory.
+         */
+        if (!S_ISDIR(parinode->i_mode))
+                return XFS_ERROR(ENOTDIR);
+        hanp = hreq->ihandle;
+        hlen = hreq->ihandlen;
+        handlep = &handle;
+        if (hlen < sizeof(handlep->ha_fsid) || hlen > sizeof(*handlep))
+                return XFS_ERROR(EINVAL);
+        if (copy_from_user(handlep, hanp, hlen))
+                return XFS_ERROR(EFAULT);
+        if (hlen < sizeof(*handlep))
+                memset(((char *)handlep) + hlen, 0, sizeof(*handlep) - hlen);
+        if (hlen > sizeof(handlep->ha_fsid)) {
+                if (handlep->ha_fid.xfs_fid_len !=
+                                (hlen - sizeof(handlep->ha_fsid)
+                                        - sizeof(handlep->ha_fid.xfs_fid_len))
+                    || handlep->ha_fid.xfs_fid_pad)
+                        return XFS_ERROR(EINVAL);
+        }
+        /*
+         * Crack the handle, obtain the inode # & generation #
+         */
+        xfid = (struct xfs_fid *)&handlep->ha_fid;
+        if (xfid->xfs_fid_len == sizeof(*xfid) - sizeof(xfid->xfs_fid_len)) {
+                ino  = xfid->xfs_fid_ino;
+                igen = xfid->xfs_fid_gen;
+        } else {
+                return XFS_ERROR(EINVAL);
+        }
+        /*
+         * Get the XFS inode, building a vnode to go with it.
+         */
+        error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, 0);
+        if (error)
+                return error;
+        if (ip == NULL)
+                return XFS_ERROR(EIO);
+        if (ip->i_d.di_mode == 0 || ip->i_d.di_gen != igen) {
+                xfs_iput_new(ip, XFS_ILOCK_SHARED);
+                return XFS_ERROR(ENOENT);
+        }
+        vpp = XFS_ITOV(ip);
+        inodep = LINVFS_GET_IP(vpp);
+        xfs_iunlock(ip, XFS_ILOCK_SHARED);
+        *vp = vpp;
+        *inode = inodep;
+        return 0;
+}
+STATIC int
+xfs_open_by_handle(
+        xfs_mount_t             *mp,
+        void                    __user *arg,
+        struct file             *parfilp,
+        struct inode            *parinode)
+{
+        int                     error;
+        int                     new_fd;
+        int                     permflag;
+        struct file             *filp;
+        struct inode            *inode;
+        struct dentry           *dentry;
+        vnode_t                 *vp;
+        xfs_fsop_handlereq_t    hreq;
+        if (!capable(CAP_SYS_ADMIN))
+                return -XFS_ERROR(EPERM);
+        if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
+                return -XFS_ERROR(EFAULT);
+        error = xfs_vget_fsop_handlereq(mp, parinode, &hreq, &vp, &inode);
+        if (error)
+                return -error;
+        /* Restrict xfs_open_by_handle to directories & regular files. */
+        if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) {
+                iput(inode);
+                return -XFS_ERROR(EINVAL);
+        }
+#if BITS_PER_LONG != 32
+        hreq.oflags |= O_LARGEFILE;
+#endif
+        /* Put open permission in namei format. */
+        permflag = hreq.oflags;
+        if ((permflag+1) & O_ACCMODE)
+                permflag++;
+        if (permflag & O_TRUNC)
+                permflag |= 2;
+        if ((!(permflag & O_APPEND) || (permflag & O_TRUNC)) &&
+            (permflag & FMODE_WRITE) && IS_APPEND(inode)) {
+                iput(inode);
+                return -XFS_ERROR(EPERM);
+        }
+        if ((permflag & FMODE_WRITE) && IS_IMMUTABLE(inode)) {
+                iput(inode);
+                return -XFS_ERROR(EACCES);
+        }
+        /* Can't write directories. */
+        if ( S_ISDIR(inode->i_mode) && (permflag & FMODE_WRITE)) {
+                iput(inode);
+                return -XFS_ERROR(EISDIR);
+        }
+        if ((new_fd = get_unused_fd()) < 0) {
+                iput(inode);
+                return new_fd;
+        }
+        dentry = d_alloc_anon(inode);
+        if (dentry == NULL) {
+                iput(inode);
+                put_unused_fd(new_fd);
+                return -XFS_ERROR(ENOMEM);
+        }
+        /* Ensure umount returns EBUSY on umounts while this file is open. */
+        mntget(parfilp->f_vfsmnt);
+        /* Create file pointer. */
+        filp = dentry_open(dentry, parfilp->f_vfsmnt, hreq.oflags);
+        if (IS_ERR(filp)) {
+                put_unused_fd(new_fd);
+                return -XFS_ERROR(-PTR_ERR(filp));
+        }
+        if (inode->i_mode & S_IFREG)
+                filp->f_op = &linvfs_invis_file_operations;
+        fd_install(new_fd, filp);
+        return new_fd;
+}
+STATIC int
+xfs_readlink_by_handle(
+        xfs_mount_t             *mp,
+        void                    __user *arg,
+        struct file             *parfilp,
+        struct inode            *parinode)
+{
+        int                     error;
+        struct iovec            aiov;
+        struct uio              auio;
+        struct inode            *inode;
+        xfs_fsop_handlereq_t    hreq;
+        vnode_t                 *vp;
+        __u32                   olen;
+        if (!capable(CAP_SYS_ADMIN))
+                return -XFS_ERROR(EPERM);
+        if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
+                return -XFS_ERROR(EFAULT);
+        error = xfs_vget_fsop_handlereq(mp, parinode, &hreq, &vp, &inode);
+        if (error)
+                return -error;
+        /* Restrict this handle operation to symlinks only. */
+        if (vp->v_type != VLNK) {
+                VN_RELE(vp);
+                return -XFS_ERROR(EINVAL);
+        }
+        if (copy_from_user(&olen, hreq.ohandlen, sizeof(__u32))) {
+                VN_RELE(vp);
+                return -XFS_ERROR(EFAULT);
+        }
+        aiov.iov_len    = olen;
+        aiov.iov_base   = hreq.ohandle;
+        auio.uio_iov    = &aiov;
+        auio.uio_iovcnt = 1;
+        auio.uio_offset = 0;
+        auio.uio_segflg = UIO_USERSPACE;
+        auio.uio_resid  = olen;
+        VOP_READLINK(vp, &auio, IO_INVIS, NULL, error);
+        VN_RELE(vp);
+        return (olen - auio.uio_resid);
+}
+STATIC int
+xfs_fssetdm_by_handle(
+        xfs_mount_t             *mp,
+        void                    __user *arg,
+        struct file             *parfilp,
+        struct inode            *parinode)
+{
+        int                     error;
+        struct fsdmidata        fsd;
+        xfs_fsop_setdm_handlereq_t dmhreq;
+        struct inode            *inode;
+        bhv_desc_t              *bdp;
+        vnode_t                 *vp;
+        if (!capable(CAP_MKNOD))
+                return -XFS_ERROR(EPERM);
+        if (copy_from_user(&dmhreq, arg, sizeof(xfs_fsop_setdm_handlereq_t)))
+                return -XFS_ERROR(EFAULT);
+        error = xfs_vget_fsop_handlereq(mp, parinode, &dmhreq.hreq, &vp, &inode);
+        if (error)
+                return -error;
+        if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) {
+                VN_RELE(vp);
+                return -XFS_ERROR(EPERM);
+        }
+        if (copy_from_user(&fsd, dmhreq.data, sizeof(fsd))) {
+                VN_RELE(vp);
+                return -XFS_ERROR(EFAULT);
+        }
+        bdp = bhv_base_unlocked(VN_BHV_HEAD(vp));
+        error = xfs_set_dmattrs(bdp, fsd.fsd_dmevmask, fsd.fsd_dmstate, NULL);
+        VN_RELE(vp);
+        if (error)
+                return -error;
+        return 0;
+}
+STATIC int
+xfs_attrlist_by_handle(
+        xfs_mount_t             *mp,
+        void                    __user *arg,
+        struct file             *parfilp,
+        struct inode            *parinode)
+{
+        int                     error;
+        attrlist_cursor_kern_t  *cursor;
+        xfs_fsop_attrlist_handlereq_t al_hreq;
+        struct inode            *inode;
+        vnode_t                 *vp;
+        char                    *kbuf;
+        if (!capable(CAP_SYS_ADMIN))
+                return -XFS_ERROR(EPERM);
+        if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t)))
+                return -XFS_ERROR(EFAULT);
+        if (al_hreq.buflen > XATTR_LIST_MAX)
+                return -XFS_ERROR(EINVAL);
+        error = xfs_vget_fsop_handlereq(mp, parinode, &al_hreq.hreq,
+                        &vp, &inode);
+        if (error)
+                goto out;
+        kbuf = kmalloc(al_hreq.buflen, GFP_KERNEL);
+        if (!kbuf)
+                goto out_vn_rele;
+        cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
+        VOP_ATTR_LIST(vp, kbuf, al_hreq.buflen, al_hreq.flags,
+                        cursor, NULL, error);
+        if (error)
+                goto out_kfree;
+        if (copy_to_user(al_hreq.buffer, kbuf, al_hreq.buflen))
+                error = -EFAULT;
+ out_kfree:
+        kfree(kbuf);
+ out_vn_rele:
+        VN_RELE(vp);
+ out:
+        return -error;
+}
+STATIC int
+xfs_attrmulti_attr_get(
+        struct vnode            *vp,
+        char                    *name,
+        char                    __user *ubuf,
+        __uint32_t              *len,
+        __uint32_t              flags)
+{
+        char                    *kbuf;
+        int                     error = EFAULT;
+        
+        if (*len > XATTR_SIZE_MAX)
+                return EINVAL;
+        kbuf = kmalloc(*len, GFP_KERNEL);
+        if (!kbuf)
+                return ENOMEM;
+        VOP_ATTR_GET(vp, name, kbuf, len, flags, NULL, error);
+        if (error)
+                goto out_kfree;
+        if (copy_to_user(ubuf, kbuf, *len))
+                error = EFAULT;
+ out_kfree:
+        kfree(kbuf);
+        return error;
+}
+STATIC int
+xfs_attrmulti_attr_set(
+        struct vnode            *vp,
+        char                    *name,
+        const char              __user *ubuf,
+        __uint32_t              len,
+        __uint32_t              flags)
+{
+        char                    *kbuf;
+        int                     error = EFAULT;
+        if (IS_IMMUTABLE(&vp->v_inode) || IS_APPEND(&vp->v_inode))
+                return EPERM;
+        if (len > XATTR_SIZE_MAX)
+                return EINVAL;
+        kbuf = kmalloc(len, GFP_KERNEL);
+        if (!kbuf)
+                return ENOMEM;
+        if (copy_from_user(kbuf, ubuf, len))
+                goto out_kfree;
+                        
+        VOP_ATTR_SET(vp, name, kbuf, len, flags, NULL, error);
+ out_kfree:
+        kfree(kbuf);
+        return error;
+}
+STATIC int
+xfs_attrmulti_attr_remove(
+        struct vnode            *vp,
+        char                    *name,
+        __uint32_t              flags)
+{
+        int                     error;
+        if (IS_IMMUTABLE(&vp->v_inode) || IS_APPEND(&vp->v_inode))
+                return EPERM;
+        VOP_ATTR_REMOVE(vp, name, flags, NULL, error);
+        return error;
+}
+STATIC int
+xfs_attrmulti_by_handle(
+        xfs_mount_t             *mp,
+        void                    __user *arg,
+        struct file             *parfilp,
+        struct inode            *parinode)
+{
+        int                     error;
+        xfs_attr_multiop_t      *ops;
+        xfs_fsop_attrmulti_handlereq_t am_hreq;
+        struct inode            *inode;
+        vnode_t                 *vp;
+        unsigned int            i, size;
+        char                    *attr_name;
+        if (!capable(CAP_SYS_ADMIN))
+                return -XFS_ERROR(EPERM);
+        if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t)))
+                return -XFS_ERROR(EFAULT);
+        error = xfs_vget_fsop_handlereq(mp, parinode, &am_hreq.hreq, &vp, &inode);
+        if (error)
+                goto out;
+        error = E2BIG;
+        size = am_hreq.opcount * sizeof(attr_multiop_t);
+        if (!size || size > 16 * PAGE_SIZE)
+                goto out_vn_rele;
+        error = ENOMEM;
+        ops = kmalloc(size, GFP_KERNEL);
+        if (!ops)
+                goto out_vn_rele;
+        error = EFAULT;
+        if (copy_from_user(ops, am_hreq.ops, size))
+                goto out_kfree_ops;
+        attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
+        if (!attr_name)
+                goto out_kfree_ops;
+        error = 0;
+        for (i = 0; i < am_hreq.opcount; i++) {
+                ops[i].am_error = strncpy_from_user(attr_name,
+                                ops[i].am_attrname, MAXNAMELEN);
+                if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
+                        error = -ERANGE;
+                if (ops[i].am_error < 0)
+                        break;
+                switch (ops[i].am_opcode) {
+                case ATTR_OP_GET:
+                        ops[i].am_error = xfs_attrmulti_attr_get(vp,
+                                        attr_name, ops[i].am_attrvalue,
+                                        &ops[i].am_length, ops[i].am_flags);
+                        break;
+                case ATTR_OP_SET:
+                        ops[i].am_error = xfs_attrmulti_attr_set(vp,
+                                        attr_name, ops[i].am_attrvalue,
+                                        ops[i].am_length, ops[i].am_flags);
+                        break;
+                case ATTR_OP_REMOVE:
+                        ops[i].am_error = xfs_attrmulti_attr_remove(vp,
+                                        attr_name, ops[i].am_flags);
+                        break;
+                default:
+                        ops[i].am_error = EINVAL;
+                }
+        }
+        if (copy_to_user(am_hreq.ops, ops, size))
+                error = XFS_ERROR(EFAULT);
+        kfree(attr_name);
+ out_kfree_ops:
+        kfree(ops);
+ out_vn_rele:
+        VN_RELE(vp);
+ out:
+        return -error;
+}
+/* prototypes for a few of the stack-hungry cases that have
+ * their own functions.  Functions are defined after their use
+ * so gcc doesn't get fancy and inline them with -03 */
+STATIC int
+xfs_ioc_space(
+        bhv_desc_t              *bdp,
+        vnode_t                 *vp,
+        struct file             *filp,
+        int                     flags,
+        unsigned int            cmd,
+        void                    __user *arg);
+STATIC int
+xfs_ioc_bulkstat(
+        xfs_mount_t             *mp,
+        unsigned int            cmd,
+        void                    __user *arg);
+STATIC int
+xfs_ioc_fsgeometry_v1(
+        xfs_mount_t             *mp,
+        void                    __user *arg);
+STATIC int
+xfs_ioc_fsgeometry(
+        xfs_mount_t             *mp,
+        void                    __user *arg);
+STATIC int
+xfs_ioc_xattr(
+        vnode_t                 *vp,
+        xfs_inode_t             *ip,
+        struct file             *filp,
+        unsigned int            cmd,
+        void                    __user *arg);
+STATIC int
+xfs_ioc_getbmap(
+        bhv_desc_t              *bdp,
+        struct file             *filp,
+        int                     flags,
+        unsigned int            cmd,
+        void                    __user *arg);
+STATIC int
+xfs_ioc_getbmapx(
+        bhv_desc_t              *bdp,
+        void                    __user *arg);
+int
+xfs_ioctl(
+        bhv_desc_t              *bdp,
+        struct inode            *inode,
+        struct file             *filp,
+        int                     ioflags,
+        unsigned int            cmd,
+        void                    __user *arg)
+{
+        int                     error;
+        vnode_t                 *vp;
+        xfs_inode_t             *ip;
+        xfs_mount_t             *mp;
+        vp = LINVFS_GET_VP(inode);
+        vn_trace_entry(vp, "xfs_ioctl", (inst_t *)__return_address);
+        ip = XFS_BHVTOI(bdp);
+        mp = ip->i_mount;
+        switch (cmd) {
+        case XFS_IOC_ALLOCSP:
+        case XFS_IOC_FREESP:
+        case XFS_IOC_RESVSP:
+        case XFS_IOC_UNRESVSP:
+        case XFS_IOC_ALLOCSP64:
+        case XFS_IOC_FREESP64:
+        case XFS_IOC_RESVSP64:
+        case XFS_IOC_UNRESVSP64:
+                /*
+                 * Only allow the sys admin to reserve space unless
+                 * unwritten extents are enabled.
+                 */
+                if (!XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb) &&
+                    !capable(CAP_SYS_ADMIN))
+                        return -EPERM;
+                return xfs_ioc_space(bdp, vp, filp, ioflags, cmd, arg);
+        case XFS_IOC_DIOINFO: {
+                struct dioattr  da;
+                xfs_buftarg_t   *target =
+                        (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
+                        mp->m_rtdev_targp : mp->m_ddev_targp;
+                da.d_mem = da.d_miniosz = 1 << target->pbr_sshift;
+                /* The size dio will do in one go */
+                da.d_maxiosz = 64 * PAGE_CACHE_SIZE;
+                if (copy_to_user(arg, &da, sizeof(da)))
+                        return -XFS_ERROR(EFAULT);
+                return 0;
+        }
+        case XFS_IOC_FSBULKSTAT_SINGLE:
+        case XFS_IOC_FSBULKSTAT:
+        case XFS_IOC_FSINUMBERS:
+                return xfs_ioc_bulkstat(mp, cmd, arg);
+        case XFS_IOC_FSGEOMETRY_V1:
+                return xfs_ioc_fsgeometry_v1(mp, arg);
+        case XFS_IOC_FSGEOMETRY:
+                return xfs_ioc_fsgeometry(mp, arg);
+        case XFS_IOC_GETVERSION:
+        case XFS_IOC_GETXFLAGS:
+        case XFS_IOC_SETXFLAGS:
+        case XFS_IOC_FSGETXATTR:
+        case XFS_IOC_FSSETXATTR:
+        case XFS_IOC_FSGETXATTRA:
+                return xfs_ioc_xattr(vp, ip, filp, cmd, arg);
+        case XFS_IOC_FSSETDM: {
+                struct fsdmidata        dmi;
+                if (copy_from_user(&dmi, arg, sizeof(dmi)))
+                        return -XFS_ERROR(EFAULT);
+                error = xfs_set_dmattrs(bdp, dmi.fsd_dmevmask, dmi.fsd_dmstate,
+                                                        NULL);
+                return -error;
+        }
+        case XFS_IOC_GETBMAP:
+        case XFS_IOC_GETBMAPA:
+                return xfs_ioc_getbmap(bdp, filp, ioflags, cmd, arg);
+        case XFS_IOC_GETBMAPX:
+                return xfs_ioc_getbmapx(bdp, arg);
+        case XFS_IOC_FD_TO_HANDLE:
+        case XFS_IOC_PATH_TO_HANDLE:
+        case XFS_IOC_PATH_TO_FSHANDLE:
+                return xfs_find_handle(cmd, arg);
+        case XFS_IOC_OPEN_BY_HANDLE:
+                return xfs_open_by_handle(mp, arg, filp, inode);
+        case XFS_IOC_FSSETDM_BY_HANDLE:
+                return xfs_fssetdm_by_handle(mp, arg, filp, inode);
+        case XFS_IOC_READLINK_BY_HANDLE:
+                return xfs_readlink_by_handle(mp, arg, filp, inode);
+        case XFS_IOC_ATTRLIST_BY_HANDLE:
+                return xfs_attrlist_by_handle(mp, arg, filp, inode);
+        case XFS_IOC_ATTRMULTI_BY_HANDLE:
+                return xfs_attrmulti_by_handle(mp, arg, filp, inode);
+        case XFS_IOC_SWAPEXT: {
+                error = xfs_swapext((struct xfs_swapext __user *)arg);
+                return -error;
+        }
+        case XFS_IOC_FSCOUNTS: {
+                xfs_fsop_counts_t out;
+                error = xfs_fs_counts(mp, &out);
+                if (error)
+                        return -error;
+                if (copy_to_user(arg, &out, sizeof(out)))
+                        return -XFS_ERROR(EFAULT);
+                return 0;
+        }
+        case XFS_IOC_SET_RESBLKS: {
+                xfs_fsop_resblks_t inout;
+                __uint64_t         in;
+                if (!capable(CAP_SYS_ADMIN))
+                        return -EPERM;
+                if (copy_from_user(&inout, arg, sizeof(inout)))
+                        return -XFS_ERROR(EFAULT);
+                /* input parameter is passed in resblks field of structure */
+                in = inout.resblks;
+                error = xfs_reserve_blocks(mp, &in, &inout);
+                if (error)
+                        return -error;
+                if (copy_to_user(arg, &inout, sizeof(inout)))
+                        return -XFS_ERROR(EFAULT);
+                return 0;
+        }
+        case XFS_IOC_GET_RESBLKS: {
+                xfs_fsop_resblks_t out;
+                if (!capable(CAP_SYS_ADMIN))
+                        return -EPERM;
+                error = xfs_reserve_blocks(mp, NULL, &out);
+                if (error)
+                        return -error;
+                if (copy_to_user(arg, &out, sizeof(out)))
+                        return -XFS_ERROR(EFAULT);
+                return 0;
+        }
+        case XFS_IOC_FSGROWFSDATA: {
+                xfs_growfs_data_t in;
+                if (!capable(CAP_SYS_ADMIN))
+                        return -EPERM;
+                if (copy_from_user(&in, arg, sizeof(in)))
+                        return -XFS_ERROR(EFAULT);
+                error = xfs_growfs_data(mp, &in);
+                return -error;
+        }
+        case XFS_IOC_FSGROWFSLOG: {
+                xfs_growfs_log_t in;
+                if (!capable(CAP_SYS_ADMIN))
+                        return -EPERM;
+                if (copy_from_user(&in, arg, sizeof(in)))
+                        return -XFS_ERROR(EFAULT);
+                error = xfs_growfs_log(mp, &in);
+                return -error;
+        }
+        case XFS_IOC_FSGROWFSRT: {
+                xfs_growfs_rt_t in;
+                if (!capable(CAP_SYS_ADMIN))
+                        return -EPERM;
+                if (copy_from_user(&in, arg, sizeof(in)))
+                        return -XFS_ERROR(EFAULT);
+                error = xfs_growfs_rt(mp, &in);
+                return -error;
+        }
+        case XFS_IOC_FREEZE:
+                if (!capable(CAP_SYS_ADMIN))
+                        return -EPERM;
+                if (inode->i_sb->s_frozen == SB_UNFROZEN)
+                        freeze_bdev(inode->i_sb->s_bdev);
+                return 0;
+        case XFS_IOC_THAW:
+                if (!capable(CAP_SYS_ADMIN))
+                        return -EPERM;
+                if (inode->i_sb->s_frozen != SB_UNFROZEN)
+                        thaw_bdev(inode->i_sb->s_bdev, inode->i_sb);
+                return 0;
+        case XFS_IOC_GOINGDOWN: {
+                __uint32_t in;
+                if (!capable(CAP_SYS_ADMIN))
+                        return -EPERM;
+                if (get_user(in, (__uint32_t __user *)arg))
+                        return -XFS_ERROR(EFAULT);
+                error = xfs_fs_goingdown(mp, in);
+                return -error;
+        }
+        case XFS_IOC_ERROR_INJECTION: {
+                xfs_error_injection_t in;
+                if (!capable(CAP_SYS_ADMIN))
+                        return -EPERM;
+                if (copy_from_user(&in, arg, sizeof(in)))
+                        return -XFS_ERROR(EFAULT);
+                error = xfs_errortag_add(in.errtag, mp);
+                return -error;
+        }
+        case XFS_IOC_ERROR_CLEARALL:
+                if (!capable(CAP_SYS_ADMIN))
+                        return -EPERM;
+                error = xfs_errortag_clearall(mp);
+                return -error;
+        default:
+                return -ENOTTY;
+        }
+}
+STATIC int
+xfs_ioc_space(
+        bhv_desc_t              *bdp,
+        vnode_t                 *vp,
+        struct file             *filp,
+        int                     ioflags,
+        unsigned int            cmd,
+        void                    __user *arg)
+{
+        xfs_flock64_t           bf;
+        int                     attr_flags = 0;
+        int                     error;
+        if (vp->v_inode.i_flags & (S_IMMUTABLE|S_APPEND))
+                return -XFS_ERROR(EPERM);
+        if (!(filp->f_flags & FMODE_WRITE))
+                return -XFS_ERROR(EBADF);
+        if (vp->v_type != VREG)
+                return -XFS_ERROR(EINVAL);
+        if (copy_from_user(&bf, arg, sizeof(bf)))
+                return -XFS_ERROR(EFAULT);
+        if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
+                attr_flags |= ATTR_NONBLOCK;
+        if (ioflags & IO_INVIS)
+                attr_flags |= ATTR_DMI;
+        error = xfs_change_file_space(bdp, cmd, &bf, filp->f_pos,
+                                              NULL, attr_flags);
+        return -error;
+}
+STATIC int
+xfs_ioc_bulkstat(
+        xfs_mount_t             *mp,
+        unsigned int            cmd,
+        void                    __user *arg)
+{
+        xfs_fsop_bulkreq_t      bulkreq;
+        int                     count;  /* # of records returned */
+        xfs_ino_t               inlast; /* last inode number */
+        int                     done;
+        int                     error;
+        /* done = 1 if there are more stats to get and if bulkstat */
+        /* should be called again (unused here, but used in dmapi) */
+        if (!capable(CAP_SYS_ADMIN))
+                return -EPERM;
+        if (XFS_FORCED_SHUTDOWN(mp))
+                return -XFS_ERROR(EIO);
+        if (copy_from_user(&bulkreq, arg, sizeof(xfs_fsop_bulkreq_t)))
+                return -XFS_ERROR(EFAULT);
+        if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
+                return -XFS_ERROR(EFAULT);
+        if ((count = bulkreq.icount) <= 0)
+                return -XFS_ERROR(EINVAL);
+        if (cmd == XFS_IOC_FSINUMBERS)
+                error = xfs_inumbers(mp, &inlast, &count,
+                                                bulkreq.ubuffer);
+        else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE)
+                error = xfs_bulkstat_single(mp, &inlast,
+                                                bulkreq.ubuffer, &done);
+        else {  /* XFS_IOC_FSBULKSTAT */
+                if (count == 1 && inlast != 0) {
+                        inlast++;
+                        error = xfs_bulkstat_single(mp, &inlast,
+                                        bulkreq.ubuffer, &done);
+                } else {
+                        error = xfs_bulkstat(mp, &inlast, &count,
+                                (bulkstat_one_pf)xfs_bulkstat_one, NULL,
+                                sizeof(xfs_bstat_t), bulkreq.ubuffer,
+                                BULKSTAT_FG_QUICK, &done);
+                }
+        }
+        if (error)
+                return -error;
+        if (bulkreq.ocount != NULL) {
+                if (copy_to_user(bulkreq.lastip, &inlast,
+                                                sizeof(xfs_ino_t)))
+                        return -XFS_ERROR(EFAULT);
+                if (copy_to_user(bulkreq.ocount, &count, sizeof(count)))
+                        return -XFS_ERROR(EFAULT);
+        }
+        return 0;
+}
+STATIC int
+xfs_ioc_fsgeometry_v1(
+        xfs_mount_t             *mp,
+        void                    __user *arg)
+{
+        xfs_fsop_geom_v1_t      fsgeo;
+        int                     error;
+        error = xfs_fs_geometry(mp, (xfs_fsop_geom_t *)&fsgeo, 3);
+        if (error)
+                return -error;
+        if (copy_to_user(arg, &fsgeo, sizeof(fsgeo)))
+                return -XFS_ERROR(EFAULT);
+        return 0;
+}
+STATIC int
+xfs_ioc_fsgeometry(
+        xfs_mount_t             *mp,
+        void                    __user *arg)
+{
+        xfs_fsop_geom_t         fsgeo;
+        int                     error;
+        error = xfs_fs_geometry(mp, &fsgeo, 4);
+        if (error)
+                return -error;
+        if (copy_to_user(arg, &fsgeo, sizeof(fsgeo)))
+                return -XFS_ERROR(EFAULT);
+        return 0;
+}
+/*
+ * Linux extended inode flags interface.
+ */
+#define LINUX_XFLAG_SYNC        0x00000008 /* Synchronous updates */
+#define LINUX_XFLAG_IMMUTABLE   0x00000010 /* Immutable file */
+#define LINUX_XFLAG_APPEND      0x00000020 /* writes to file may only append */
+#define LINUX_XFLAG_NODUMP      0x00000040 /* do not dump file */
+#define LINUX_XFLAG_NOATIME     0x00000080 /* do not update atime */
+STATIC unsigned int
+xfs_merge_ioc_xflags(
+        unsigned int    flags,
+        unsigned int    start)
+{
+        unsigned int    xflags = start;
+        if (flags & LINUX_XFLAG_IMMUTABLE)
+                xflags |= XFS_XFLAG_IMMUTABLE;
+        else
+                xflags &= ~XFS_XFLAG_IMMUTABLE;
+        if (flags & LINUX_XFLAG_APPEND)
+                xflags |= XFS_XFLAG_APPEND;
+        else
+                xflags &= ~XFS_XFLAG_APPEND;
+        if (flags & LINUX_XFLAG_SYNC)
+                xflags |= XFS_XFLAG_SYNC;
+        else
+                xflags &= ~XFS_XFLAG_SYNC;
+        if (flags & LINUX_XFLAG_NOATIME)
+                xflags |= XFS_XFLAG_NOATIME;
+        else
+                xflags &= ~XFS_XFLAG_NOATIME;
+        if (flags & LINUX_XFLAG_NODUMP)
+                xflags |= XFS_XFLAG_NODUMP;
+        else
+                xflags &= ~XFS_XFLAG_NODUMP;
+        return xflags;
+}
+STATIC unsigned int
+xfs_di2lxflags(
+        __uint16_t      di_flags)
+{
+        unsigned int    flags = 0;
+        if (di_flags & XFS_DIFLAG_IMMUTABLE)
+                flags |= LINUX_XFLAG_IMMUTABLE;
+        if (di_flags & XFS_DIFLAG_APPEND)
+                flags |= LINUX_XFLAG_APPEND;
+        if (di_flags & XFS_DIFLAG_SYNC)
+                flags |= LINUX_XFLAG_SYNC;
+        if (di_flags & XFS_DIFLAG_NOATIME)
+                flags |= LINUX_XFLAG_NOATIME;
+        if (di_flags & XFS_DIFLAG_NODUMP)
+                flags |= LINUX_XFLAG_NODUMP;
+        return flags;
+}
+STATIC int
+xfs_ioc_xattr(
+        vnode_t                 *vp,
+        xfs_inode_t             *ip,
+        struct file             *filp,
+        unsigned int            cmd,
+        void                    __user *arg)
+{
+        struct fsxattr          fa;
+        vattr_t                 va;
+        int                     error;
+        int                     attr_flags;
+        unsigned int            flags;
+        switch (cmd) {
+        case XFS_IOC_FSGETXATTR: {
+                va.va_mask = XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_NEXTENTS;
+                VOP_GETATTR(vp, &va, 0, NULL, error);
+                if (error)
+                        return -error;
+                fa.fsx_xflags   = va.va_xflags;
+                fa.fsx_extsize  = va.va_extsize;
+                fa.fsx_nextents = va.va_nextents;
+                if (copy_to_user(arg, &fa, sizeof(fa)))
+                        return -XFS_ERROR(EFAULT);
+                return 0;
+        }
+        case XFS_IOC_FSSETXATTR: {
+                if (copy_from_user(&fa, arg, sizeof(fa)))
+                        return -XFS_ERROR(EFAULT);
+                attr_flags = 0;
+                if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
+                        attr_flags |= ATTR_NONBLOCK;
+                va.va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE;
+                va.va_xflags  = fa.fsx_xflags;
+                va.va_extsize = fa.fsx_extsize;
+                VOP_SETATTR(vp, &va, attr_flags, NULL, error);
+                if (!error)
+                        vn_revalidate(vp);      /* update Linux inode flags */
+                return -error;
+        }
+        case XFS_IOC_FSGETXATTRA: {
+                va.va_mask = XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_ANEXTENTS;
+                VOP_GETATTR(vp, &va, 0, NULL, error);
+                if (error)
+                        return -error;
+                fa.fsx_xflags   = va.va_xflags;
+                fa.fsx_extsize  = va.va_extsize;
+                fa.fsx_nextents = va.va_anextents;
+                if (copy_to_user(arg, &fa, sizeof(fa)))
+                        return -XFS_ERROR(EFAULT);
+                return 0;
+        }
+        case XFS_IOC_GETXFLAGS: {
+                flags = xfs_di2lxflags(ip->i_d.di_flags);
+                if (copy_to_user(arg, &flags, sizeof(flags)))
+                        return -XFS_ERROR(EFAULT);
+                return 0;
+        }
+        case XFS_IOC_SETXFLAGS: {
+                if (copy_from_user(&flags, arg, sizeof(flags)))
+                        return -XFS_ERROR(EFAULT);
+                if (flags & ~(LINUX_XFLAG_IMMUTABLE | LINUX_XFLAG_APPEND | \
+                              LINUX_XFLAG_NOATIME | LINUX_XFLAG_NODUMP | \
+                              LINUX_XFLAG_SYNC))
+                        return -XFS_ERROR(EOPNOTSUPP);
+                attr_flags = 0;
+                if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
+                        attr_flags |= ATTR_NONBLOCK;
+                va.va_mask = XFS_AT_XFLAGS;
+                va.va_xflags = xfs_merge_ioc_xflags(flags,
+                                xfs_ip2xflags(ip));
+                VOP_SETATTR(vp, &va, attr_flags, NULL, error);
+                if (!error)
+                        vn_revalidate(vp);      /* update Linux inode flags */
+                return -error;
+        }
+        case XFS_IOC_GETVERSION: {
+                flags = LINVFS_GET_IP(vp)->i_generation;
+                if (copy_to_user(arg, &flags, sizeof(flags)))
+                        return -XFS_ERROR(EFAULT);
+                return 0;
+        }
+        default:
+                return -ENOTTY;
+        }
+}
+STATIC int
+xfs_ioc_getbmap(
+        bhv_desc_t              *bdp,
+        struct file             *filp,
+        int                     ioflags,
+        unsigned int            cmd,
+        void                    __user *arg)
+{
+        struct getbmap          bm;
+        int                     iflags;
+        int                     error;
+        if (copy_from_user(&bm, arg, sizeof(bm)))
+                return -XFS_ERROR(EFAULT);
+        if (bm.bmv_count < 2)
+                return -XFS_ERROR(EINVAL);
+        iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0);
+        if (ioflags & IO_INVIS)
+                iflags |= BMV_IF_NO_DMAPI_READ;
+        error = xfs_getbmap(bdp, &bm, (struct getbmap __user *)arg+1, iflags);
+        if (error)
+                return -error;
+        if (copy_to_user(arg, &bm, sizeof(bm)))
+                return -XFS_ERROR(EFAULT);
+        return 0;
+}
+STATIC int
+xfs_ioc_getbmapx(
+        bhv_desc_t              *bdp,
+        void                    __user *arg)
+{
+        struct getbmapx         bmx;
+        struct getbmap          bm;
+        int                     iflags;
+        int                     error;
+        if (copy_from_user(&bmx, arg, sizeof(bmx)))
+                return -XFS_ERROR(EFAULT);
+        if (bmx.bmv_count < 2)
+                return -XFS_ERROR(EINVAL);
+        /*
+         * Map input getbmapx structure to a getbmap
+         * structure for xfs_getbmap.
+         */
+        GETBMAP_CONVERT(bmx, bm);
+        iflags = bmx.bmv_iflags;
+        if (iflags & (~BMV_IF_VALID))
+                return -XFS_ERROR(EINVAL);
+        iflags |= BMV_IF_EXTENDED;
+        error = xfs_getbmap(bdp, &bm, (struct getbmapx __user *)arg+1, iflags);
+        if (error)
+                return -error;
+        GETBMAP_CONVERT(bm, bmx);
+        if (copy_to_user(arg, &bmx, sizeof(bmx)))
+                return -XFS_ERROR(EFAULT);
+        return 0;
+}
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
new file mode 100644
index 000000000000..7a12c83184f5
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2004 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#include <linux/config.h>
+#include <linux/compat.h>
+#include <linux/init.h>
+#include <linux/ioctl.h>
+#include <linux/ioctl32.h>
+#include <linux/syscalls.h>
+#include <linux/types.h>
+#include <linux/fs.h>
+#include <asm/uaccess.h>
+#include "xfs.h"
+#include "xfs_types.h"
+#include "xfs_fs.h"
+#include "xfs_vfs.h"
+#include "xfs_vnode.h"
+#include "xfs_dfrag.h"
+#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
+#define BROKEN_X86_ALIGNMENT
+#else
+typedef struct xfs_fsop_bulkreq32 {
+        compat_uptr_t   lastip;         /* last inode # pointer         */
+        __s32           icount;         /* count of entries in buffer   */
+        compat_uptr_t   ubuffer;        /* user buffer for inode desc.  */
+        __s32           ocount;         /* output count pointer         */
+} xfs_fsop_bulkreq32_t;
+static unsigned long
+xfs_ioctl32_bulkstat(unsigned long arg)
+{
+        xfs_fsop_bulkreq32_t    __user *p32 = (void __user *)arg;
+        xfs_fsop_bulkreq_t      __user *p = compat_alloc_user_space(sizeof(*p));
+        u32                     addr;
+        if (get_user(addr, &p32->lastip) ||
+            put_user(compat_ptr(addr), &p->lastip) ||
+            copy_in_user(&p->icount, &p32->icount, sizeof(s32)) ||
+            get_user(addr, &p32->ubuffer) ||
+            put_user(compat_ptr(addr), &p->ubuffer) ||
+            get_user(addr, &p32->ocount) ||
+            put_user(compat_ptr(addr), &p->ocount))
+                return -EFAULT;
+        return (unsigned long)p;
+}
+#endif
+static long
+__xfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg)
+{
+        int             error;
+        struct inode *inode = f->f_dentry->d_inode;
+        vnode_t         *vp = LINVFS_GET_VP(inode);
+        switch (cmd) {
+        case XFS_IOC_DIOINFO:
+        case XFS_IOC_FSGEOMETRY_V1:
+        case XFS_IOC_FSGEOMETRY:
+        case XFS_IOC_GETVERSION:
+        case XFS_IOC_GETXFLAGS:
+        case XFS_IOC_SETXFLAGS:
+        case XFS_IOC_FSGETXATTR:
+        case XFS_IOC_FSSETXATTR:
+        case XFS_IOC_FSGETXATTRA:
+        case XFS_IOC_FSSETDM:
+        case XFS_IOC_GETBMAP:
+        case XFS_IOC_GETBMAPA:
+        case XFS_IOC_GETBMAPX:
+/* not handled
+        case XFS_IOC_FD_TO_HANDLE:
+        case XFS_IOC_PATH_TO_HANDLE:
+        case XFS_IOC_PATH_TO_HANDLE:
+        case XFS_IOC_PATH_TO_FSHANDLE:
+        case XFS_IOC_OPEN_BY_HANDLE:
+        case XFS_IOC_FSSETDM_BY_HANDLE:
+        case XFS_IOC_READLINK_BY_HANDLE:
+        case XFS_IOC_ATTRLIST_BY_HANDLE:
+        case XFS_IOC_ATTRMULTI_BY_HANDLE:
+*/
+        case XFS_IOC_FSCOUNTS:
+        case XFS_IOC_SET_RESBLKS:
+        case XFS_IOC_GET_RESBLKS:
+        case XFS_IOC_FSGROWFSDATA:
+        case XFS_IOC_FSGROWFSLOG:
+        case XFS_IOC_FSGROWFSRT:
+        case XFS_IOC_FREEZE:
+        case XFS_IOC_THAW:
+        case XFS_IOC_GOINGDOWN:
+        case XFS_IOC_ERROR_INJECTION:
+        case XFS_IOC_ERROR_CLEARALL:
+                break;
+#ifndef BROKEN_X86_ALIGNMENT
+        /* xfs_flock_t and xfs_bstat_t have wrong u32 vs u64 alignment */
+        case XFS_IOC_ALLOCSP:
+        case XFS_IOC_FREESP:
+        case XFS_IOC_RESVSP:
+        case XFS_IOC_UNRESVSP:
+        case XFS_IOC_ALLOCSP64:
+        case XFS_IOC_FREESP64:
+        case XFS_IOC_RESVSP64:
+        case XFS_IOC_UNRESVSP64:
+        case XFS_IOC_SWAPEXT:
+                break;
+        case XFS_IOC_FSBULKSTAT_SINGLE:
+        case XFS_IOC_FSBULKSTAT:
+        case XFS_IOC_FSINUMBERS:
+                arg = xfs_ioctl32_bulkstat(arg);
+                break;
+#endif
+        default:
+                return -ENOIOCTLCMD;
+        }
+        VOP_IOCTL(vp, inode, f, mode, cmd, (void __user *)arg, error);
+        VMODIFY(vp);
+        return error;
+}
+long xfs_compat_ioctl(struct file *f, unsigned cmd, unsigned long arg)
+{
+        return __xfs_compat_ioctl(0, f, cmd, arg);
+}
+long xfs_compat_invis_ioctl(struct file *f, unsigned cmd, unsigned long arg)
+{
+        return __xfs_compat_ioctl(IO_INVIS, f, cmd, arg);
+}
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h
new file mode 100644
index 000000000000..779f69a48116
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2004 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+long xfs_compat_ioctl(struct file *f, unsigned cmd, unsigned long arg);
+long xfs_compat_invis_ioctl(struct file *f, unsigned cmd, unsigned long arg);
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
new file mode 100644
index 000000000000..407e99359391
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -0,0 +1,680 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_dir.h"
+#include "xfs_dir2.h"
+#include "xfs_alloc.h"
+#include "xfs_dmapi.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dir_sf.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_bmap.h"
+#include "xfs_bit.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_itable.h"
+#include "xfs_rw.h"
+#include "xfs_acl.h"
+#include "xfs_cap.h"
+#include "xfs_mac.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_utils.h"
+#include <linux/xattr.h>
+#include <linux/namei.h>
+/*
+ * Pull the link count and size up from the xfs inode to the linux inode
+ */
+STATIC void
+validate_fields(
+        struct inode    *ip)
+{
+        vnode_t         *vp = LINVFS_GET_VP(ip);
+        vattr_t         va;
+        int             error;
+        va.va_mask = XFS_AT_NLINK|XFS_AT_SIZE|XFS_AT_NBLOCKS;
+        VOP_GETATTR(vp, &va, ATTR_LAZY, NULL, error);
+        if (likely(!error)) {
+                ip->i_nlink = va.va_nlink;
+                ip->i_blocks = va.va_nblocks;
+                /* we're under i_sem so i_size can't change under us */
+                if (i_size_read(ip) != va.va_size)
+                        i_size_write(ip, va.va_size);
+        }
+}
+/*
+ * Determine whether a process has a valid fs_struct (kernel daemons
+ * like knfsd don't have an fs_struct).
+ *
+ * XXX(hch):  nfsd is broken, better fix it instead.
+ */
+STATIC inline int
+has_fs_struct(struct task_struct *task)
+{
+        return (task->fs != init_task.fs);
+}
+STATIC int
+linvfs_mknod(
+        struct inode    *dir,
+        struct dentry   *dentry,
+        int             mode,
+        dev_t           rdev)
+{
+        struct inode    *ip;
+        vattr_t         va;
+        vnode_t         *vp = NULL, *dvp = LINVFS_GET_VP(dir);
+        xfs_acl_t       *default_acl = NULL;
+        attrexists_t    test_default_acl = _ACL_DEFAULT_EXISTS;
+        int             error;
+        /*
+         * Irix uses Missed'em'V split, but doesn't want to see
+         * the upper 5 bits of (14bit) major.
+         */
+        if (!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff)
+                return -EINVAL;
+        if (test_default_acl && test_default_acl(dvp)) {
+                if (!_ACL_ALLOC(default_acl))
+                        return -ENOMEM;
+                if (!_ACL_GET_DEFAULT(dvp, default_acl)) {
+                        _ACL_FREE(default_acl);
+                        default_acl = NULL;
+                }
+        }
+        if (IS_POSIXACL(dir) && !default_acl && has_fs_struct(current))
+                mode &= ~current->fs->umask;
+        memset(&va, 0, sizeof(va));
+        va.va_mask = XFS_AT_TYPE|XFS_AT_MODE;
+        va.va_type = IFTOVT(mode);
+        va.va_mode = mode;
+        switch (mode & S_IFMT) {
+        case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK:
+                va.va_rdev = sysv_encode_dev(rdev);
+                va.va_mask |= XFS_AT_RDEV;
+                /*FALLTHROUGH*/
+        case S_IFREG:
+                VOP_CREATE(dvp, dentry, &va, &vp, NULL, error);
+                break;
+        case S_IFDIR:
+                VOP_MKDIR(dvp, dentry, &va, &vp, NULL, error);
+                break;
+        default:
+                error = EINVAL;
+                break;
+        }
+        if (default_acl) {
+                if (!error) {
+                        error = _ACL_INHERIT(vp, &va, default_acl);
+                        if (!error) {
+                                VMODIFY(vp);
+                        } else {
+                                struct dentry   teardown = {};
+                                int             err2;
+                                /* Oh, the horror.
+                                 * If we can't add the ACL we must back out.
+                                 * ENOSPC can hit here, among other things.
+                                 */
+                                teardown.d_inode = ip = LINVFS_GET_IP(vp);
+                                teardown.d_name = dentry->d_name;
+                                vn_mark_bad(vp);
+                                
+                                if (S_ISDIR(mode))
+                                        VOP_RMDIR(dvp, &teardown, NULL, err2);
+                                else
+                                        VOP_REMOVE(dvp, &teardown, NULL, err2);
+                                VN_RELE(vp);
+                        }
+                }
+                _ACL_FREE(default_acl);
+        }
+        if (!error) {
+                ASSERT(vp);
+                ip = LINVFS_GET_IP(vp);
+                if (S_ISCHR(mode) || S_ISBLK(mode))
+                        ip->i_rdev = rdev;
+                else if (S_ISDIR(mode))
+                        validate_fields(ip);
+                d_instantiate(dentry, ip);
+                validate_fields(dir);
+        }
+        return -error;
+}
+STATIC int
+linvfs_create(
+        struct inode    *dir,
+        struct dentry   *dentry,
+        int             mode,
+        struct nameidata *nd)
+{
+        return linvfs_mknod(dir, dentry, mode, 0);
+}
+STATIC int
+linvfs_mkdir(
+        struct inode    *dir,
+        struct dentry   *dentry,
+        int             mode)
+{
+        return linvfs_mknod(dir, dentry, mode|S_IFDIR, 0);
+}
+STATIC struct dentry *
+linvfs_lookup(
+        struct inode    *dir,
+        struct dentry   *dentry,
+        struct nameidata *nd)
+{
+        struct vnode    *vp = LINVFS_GET_VP(dir), *cvp;
+        int             error;
+        if (dentry->d_name.len >= MAXNAMELEN)
+                return ERR_PTR(-ENAMETOOLONG);
+        VOP_LOOKUP(vp, dentry, &cvp, 0, NULL, NULL, error);
+        if (error) {
+                if (unlikely(error != ENOENT))
+                        return ERR_PTR(-error);
+                d_add(dentry, NULL);
+                return NULL;
+        }
+        return d_splice_alias(LINVFS_GET_IP(cvp), dentry);
+}
+STATIC int
+linvfs_link(
+        struct dentry   *old_dentry,
+        struct inode    *dir,
+        struct dentry   *dentry)
+{
+        struct inode    *ip;    /* inode of guy being linked to */
+        vnode_t         *tdvp;  /* target directory for new name/link */
+        vnode_t         *vp;    /* vp of name being linked */
+        int             error;
+        ip = old_dentry->d_inode;       /* inode being linked to */
+        if (S_ISDIR(ip->i_mode))
+                return -EPERM;
+        tdvp = LINVFS_GET_VP(dir);
+        vp = LINVFS_GET_VP(ip);
+        VOP_LINK(tdvp, vp, dentry, NULL, error);
+        if (!error) {
+                VMODIFY(tdvp);
+                VN_HOLD(vp);
+                validate_fields(ip);
+                d_instantiate(dentry, ip);
+        }
+        return -error;
+}
+STATIC int
+linvfs_unlink(
+        struct inode    *dir,
+        struct dentry   *dentry)
+{
+        struct inode    *inode;
+        vnode_t         *dvp;   /* directory containing name to remove */
+        int             error;
+        inode = dentry->d_inode;
+        dvp = LINVFS_GET_VP(dir);
+        VOP_REMOVE(dvp, dentry, NULL, error);
+        if (!error) {
+                validate_fields(dir);   /* For size only */
+                validate_fields(inode);
+        }
+        return -error;
+}
+STATIC int
+linvfs_symlink(
+        struct inode    *dir,
+        struct dentry   *dentry,
+        const char      *symname)
+{
+        struct inode    *ip;
+        vattr_t         va;
+        vnode_t         *dvp;   /* directory containing name of symlink */
+        vnode_t         *cvp;   /* used to lookup symlink to put in dentry */
+        int             error;
+        dvp = LINVFS_GET_VP(dir);
+        cvp = NULL;
+        memset(&va, 0, sizeof(va));
+        va.va_type = VLNK;
+        va.va_mode = irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO;
+        va.va_mask = XFS_AT_TYPE|XFS_AT_MODE;
+        error = 0;
+        VOP_SYMLINK(dvp, dentry, &va, (char *)symname, &cvp, NULL, error);
+        if (!error && cvp) {
+                ASSERT(cvp->v_type == VLNK);
+                ip = LINVFS_GET_IP(cvp);
+                d_instantiate(dentry, ip);
+                validate_fields(dir);
+                validate_fields(ip); /* size needs update */
+        }
+        return -error;
+}
+STATIC int
+linvfs_rmdir(
+        struct inode    *dir,
+        struct dentry   *dentry)
+{
+        struct inode    *inode = dentry->d_inode;
+        vnode_t         *dvp = LINVFS_GET_VP(dir);
+        int             error;
+        VOP_RMDIR(dvp, dentry, NULL, error);
+        if (!error) {
+                validate_fields(inode);
+                validate_fields(dir);
+        }
+        return -error;
+}
+STATIC int
+linvfs_rename(
+        struct inode    *odir,
+        struct dentry   *odentry,
+        struct inode    *ndir,
+        struct dentry   *ndentry)
+{
+        struct inode    *new_inode = ndentry->d_inode;
+        vnode_t         *fvp;   /* from directory */
+        vnode_t         *tvp;   /* target directory */
+        int             error;
+        fvp = LINVFS_GET_VP(odir);
+        tvp = LINVFS_GET_VP(ndir);
+        VOP_RENAME(fvp, odentry, tvp, ndentry, NULL, error);
+        if (error)
+                return -error;
+        if (new_inode)
+                validate_fields(new_inode);
+        validate_fields(odir);
+        if (ndir != odir)
+                validate_fields(ndir);
+        return 0;
+}
+/*
+ * careful here - this function can get called recursively, so
+ * we need to be very careful about how much stack we use.
+ * uio is kmalloced for this reason...
+ */
+STATIC int
+linvfs_follow_link(
+        struct dentry           *dentry,
+        struct nameidata        *nd)
+{
+        vnode_t                 *vp;
+        uio_t                   *uio;
+        iovec_t                 iov;
+        int                     error;
+        char                    *link;
+        ASSERT(dentry);
+        ASSERT(nd);
+        link = (char *)kmalloc(MAXNAMELEN+1, GFP_KERNEL);
+        if (!link) {
+                nd_set_link(nd, ERR_PTR(-ENOMEM));
+                return 0;
+        }
+        uio = (uio_t *)kmalloc(sizeof(uio_t), GFP_KERNEL);
+        if (!uio) {
+                kfree(link);
+                nd_set_link(nd, ERR_PTR(-ENOMEM));
+                return 0;
+        }
+        vp = LINVFS_GET_VP(dentry->d_inode);
+        iov.iov_base = link;
+        iov.iov_len = MAXNAMELEN;
+        uio->uio_iov = &iov;
+        uio->uio_offset = 0;
+        uio->uio_segflg = UIO_SYSSPACE;
+        uio->uio_resid = MAXNAMELEN;
+        uio->uio_iovcnt = 1;
+        VOP_READLINK(vp, uio, 0, NULL, error);
+        if (error) {
+                kfree(link);
+                link = ERR_PTR(-error);
+        } else {
+                link[MAXNAMELEN - uio->uio_resid] = '\0';
+        }
+        kfree(uio);
+        nd_set_link(nd, link);
+        return 0;
+}
+static void linvfs_put_link(struct dentry *dentry, struct nameidata *nd)
+{
+        char *s = nd_get_link(nd);
+        if (!IS_ERR(s))
+                kfree(s);
+}
+#ifdef CONFIG_XFS_POSIX_ACL
+STATIC int
+linvfs_permission(
+        struct inode    *inode,
+        int             mode,
+        struct nameidata *nd)
+{
+        vnode_t         *vp = LINVFS_GET_VP(inode);
+        int             error;
+        mode <<= 6;             /* convert from linux to vnode access bits */
+        VOP_ACCESS(vp, mode, NULL, error);
+        return -error;
+}
+#else
+#define linvfs_permission NULL
+#endif
+STATIC int
+linvfs_getattr(
+        struct vfsmount *mnt,
+        struct dentry   *dentry,
+        struct kstat    *stat)
+{
+        struct inode    *inode = dentry->d_inode;
+        vnode_t         *vp = LINVFS_GET_VP(inode);
+        int             error = 0;
+        if (unlikely(vp->v_flag & VMODIFIED))
+                error = vn_revalidate(vp);
+        if (!error)
+                generic_fillattr(inode, stat);
+        return 0;
+}
+STATIC int
+linvfs_setattr(
+        struct dentry   *dentry,
+        struct iattr    *attr)
+{
+        struct inode    *inode = dentry->d_inode;
+        unsigned int    ia_valid = attr->ia_valid;
+        vnode_t         *vp = LINVFS_GET_VP(inode);
+        vattr_t         vattr;
+        int             flags = 0;
+        int             error;
+        memset(&vattr, 0, sizeof(vattr_t));
+        if (ia_valid & ATTR_UID) {
+                vattr.va_mask |= XFS_AT_UID;
+                vattr.va_uid = attr->ia_uid;
+        }
+        if (ia_valid & ATTR_GID) {
+                vattr.va_mask |= XFS_AT_GID;
+                vattr.va_gid = attr->ia_gid;
+        }
+        if (ia_valid & ATTR_SIZE) {
+                vattr.va_mask |= XFS_AT_SIZE;
+                vattr.va_size = attr->ia_size;
+        }
+        if (ia_valid & ATTR_ATIME) {
+                vattr.va_mask |= XFS_AT_ATIME;
+                vattr.va_atime = attr->ia_atime;
+        }
+        if (ia_valid & ATTR_MTIME) {
+                vattr.va_mask |= XFS_AT_MTIME;
+                vattr.va_mtime = attr->ia_mtime;
+        }
+        if (ia_valid & ATTR_CTIME) {
+                vattr.va_mask |= XFS_AT_CTIME;
+                vattr.va_ctime = attr->ia_ctime;
+        }
+        if (ia_valid & ATTR_MODE) {
+                vattr.va_mask |= XFS_AT_MODE;
+                vattr.va_mode = attr->ia_mode;
+                if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
+                        inode->i_mode &= ~S_ISGID;
+        }
+        if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET))
+                flags |= ATTR_UTIME;
+#ifdef ATTR_NO_BLOCK
+        if ((ia_valid & ATTR_NO_BLOCK))
+                flags |= ATTR_NONBLOCK;
+#endif
+        VOP_SETATTR(vp, &vattr, flags, NULL, error);
+        if (error)
+                return -error;
+        vn_revalidate(vp);
+        return error;
+}
+STATIC void
+linvfs_truncate(
+        struct inode    *inode)
+{
+        block_truncate_page(inode->i_mapping, inode->i_size, linvfs_get_block);
+}
+STATIC int
+linvfs_setxattr(
+        struct dentry   *dentry,
+        const char      *name,
+        const void      *data,
+        size_t          size,
+        int             flags)
+{
+        vnode_t         *vp = LINVFS_GET_VP(dentry->d_inode);
+        char            *attr = (char *)name;
+        attrnames_t     *namesp;
+        int             xflags = 0;
+        int             error;
+        namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT);
+        if (!namesp)
+                return -EOPNOTSUPP;
+        attr += namesp->attr_namelen;
+        error = namesp->attr_capable(vp, NULL);
+        if (error)
+                return error;
+        /* Convert Linux syscall to XFS internal ATTR flags */
+        if (flags & XATTR_CREATE)
+                xflags |= ATTR_CREATE;
+        if (flags & XATTR_REPLACE)
+                xflags |= ATTR_REPLACE;
+        xflags |= namesp->attr_flag;
+        return namesp->attr_set(vp, attr, (void *)data, size, xflags);
+}
+STATIC ssize_t
+linvfs_getxattr(
+        struct dentry   *dentry,
+        const char      *name,
+        void            *data,
+        size_t          size)
+{
+        vnode_t         *vp = LINVFS_GET_VP(dentry->d_inode);
+        char            *attr = (char *)name;
+        attrnames_t     *namesp;
+        int             xflags = 0;
+        ssize_t         error;
+        namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT);
+        if (!namesp)
+                return -EOPNOTSUPP;
+        attr += namesp->attr_namelen;
+        error = namesp->attr_capable(vp, NULL);
+        if (error)
+                return error;
+        /* Convert Linux syscall to XFS internal ATTR flags */
+        if (!size) {
+                xflags |= ATTR_KERNOVAL;
+                data = NULL;
+        }
+        xflags |= namesp->attr_flag;
+        return namesp->attr_get(vp, attr, (void *)data, size, xflags);
+}
+STATIC ssize_t
+linvfs_listxattr(
+        struct dentry           *dentry,
+        char                    *data,
+        size_t                  size)
+{
+        vnode_t                 *vp = LINVFS_GET_VP(dentry->d_inode);
+        int                     error, xflags = ATTR_KERNAMELS;
+        ssize_t                 result;
+        if (!size)
+                xflags |= ATTR_KERNOVAL;
+        xflags |= capable(CAP_SYS_ADMIN) ? ATTR_KERNFULLS : ATTR_KERNORMALS;
+        error = attr_generic_list(vp, data, size, xflags, &result);
+        if (error < 0)
+                return error;
+        return result;
+}
+STATIC int
+linvfs_removexattr(
+        struct dentry   *dentry,
+        const char      *name)
+{
+        vnode_t         *vp = LINVFS_GET_VP(dentry->d_inode);
+        char            *attr = (char *)name;
+        attrnames_t     *namesp;
+        int             xflags = 0;
+        int             error;
+        namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT);
+        if (!namesp)
+                return -EOPNOTSUPP;
+        attr += namesp->attr_namelen;
+        error = namesp->attr_capable(vp, NULL);
+        if (error)
+                return error;
+        xflags |= namesp->attr_flag;
+        return namesp->attr_remove(vp, attr, xflags);
+}
+struct inode_operations linvfs_file_inode_operations = {
+        .permission             = linvfs_permission,
+        .truncate               = linvfs_truncate,
+        .getattr                = linvfs_getattr,
+        .setattr                = linvfs_setattr,
+        .setxattr               = linvfs_setxattr,
+        .getxattr               = linvfs_getxattr,
+        .listxattr              = linvfs_listxattr,
+        .removexattr            = linvfs_removexattr,
+};
+struct inode_operations linvfs_dir_inode_operations = {
+        .create                 = linvfs_create,
+        .lookup                 = linvfs_lookup,
+        .link                   = linvfs_link,
+        .unlink                 = linvfs_unlink,
+        .symlink                = linvfs_symlink,
+        .mkdir                  = linvfs_mkdir,
+        .rmdir                  = linvfs_rmdir,
+        .mknod                  = linvfs_mknod,
+        .rename                 = linvfs_rename,
+        .permission             = linvfs_permission,
+        .getattr                = linvfs_getattr,
+        .setattr                = linvfs_setattr,
+        .setxattr               = linvfs_setxattr,
+        .getxattr               = linvfs_getxattr,
+        .listxattr              = linvfs_listxattr,
+        .removexattr            = linvfs_removexattr,
+};
+struct inode_operations linvfs_symlink_inode_operations = {
+        .readlink               = generic_readlink,
+        .follow_link            = linvfs_follow_link,
+        .put_link               = linvfs_put_link,
+        .permission             = linvfs_permission,
+        .getattr                = linvfs_getattr,
+        .setattr                = linvfs_setattr,
+        .setxattr               = linvfs_setxattr,
+        .getxattr               = linvfs_getxattr,
+        .listxattr              = linvfs_listxattr,
+        .removexattr            = linvfs_removexattr,
+};
diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h
new file mode 100644
index 000000000000..6a69a62c36b0
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_iops.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_IOPS_H__
+#define __XFS_IOPS_H__
+extern struct inode_operations linvfs_file_inode_operations;
+extern struct inode_operations linvfs_dir_inode_operations;
+extern struct inode_operations linvfs_symlink_inode_operations;
+extern struct file_operations linvfs_file_operations;
+extern struct file_operations linvfs_invis_file_operations;
+extern struct file_operations linvfs_dir_operations;
+extern struct address_space_operations linvfs_aops;
+extern int linvfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
+extern void linvfs_unwritten_done(struct buffer_head *, int);
+extern int xfs_ioctl(struct bhv_desc *, struct inode *, struct file *,
+                        int, unsigned int, void __user *);
+#endif /* __XFS_IOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
new file mode 100644
index 000000000000..71bb41019a12
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -0,0 +1,374 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_LINUX__
+#define __XFS_LINUX__
+#include <linux/types.h>
+#include <linux/config.h>
+/*
+ * Some types are conditional depending on the target system.
+ * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits.
+ * XFS_BIG_INUMS needs the VFS inode number to be 64 bits, as well
+ * as requiring XFS_BIG_BLKNOS to be set.
+ */
+#if defined(CONFIG_LBD) || (BITS_PER_LONG == 64)
+# define XFS_BIG_BLKNOS 1
+# if BITS_PER_LONG == 64
+#  define XFS_BIG_INUMS 1
+# else
+#  define XFS_BIG_INUMS 0
+# endif
+#else
+# define XFS_BIG_BLKNOS 0
+# define XFS_BIG_INUMS  0
+#endif
+#include <xfs_types.h>
+#include <xfs_arch.h>
+#include <kmem.h>
+#include <mrlock.h>
+#include <spin.h>
+#include <sv.h>
+#include <mutex.h>
+#include <sema.h>
+#include <time.h>
+#include <support/qsort.h>
+#include <support/ktrace.h>
+#include <support/debug.h>
+#include <support/move.h>
+#include <support/uuid.h>
+#include <linux/mm.h>
+#include <linux/kernel.h>
+#include <linux/blkdev.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/file.h>
+#include <linux/swap.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/bitops.h>
+#include <linux/major.h>
+#include <linux/pagemap.h>
+#include <linux/vfs.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/proc_fs.h>
+#include <linux/version.h>
+#include <linux/sort.h>
+#include <asm/page.h>
+#include <asm/div64.h>
+#include <asm/param.h>
+#include <asm/uaccess.h>
+#include <asm/byteorder.h>
+#include <asm/unaligned.h>
+#include <xfs_behavior.h>
+#include <xfs_vfs.h>
+#include <xfs_cred.h>
+#include <xfs_vnode.h>
+#include <xfs_stats.h>
+#include <xfs_sysctl.h>
+#include <xfs_iops.h>
+#include <xfs_super.h>
+#include <xfs_globals.h>
+#include <xfs_fs_subr.h>
+#include <xfs_lrw.h>
+#include <xfs_buf.h>
+/*
+ * Feature macros (disable/enable)
+ */
+#undef  HAVE_REFCACHE   /* reference cache not needed for NFS in 2.6 */
+#define HAVE_SENDFILE   /* sendfile(2) exists in 2.6, but not in 2.4 */
+/*
+ * State flag for unwritten extent buffers.
+ *
+ * We need to be able to distinguish between these and delayed
+ * allocate buffers within XFS.  The generic IO path code does
+ * not need to distinguish - we use the BH_Delay flag for both
+ * delalloc and these ondisk-uninitialised buffers.
+ */
+BUFFER_FNS(PrivateStart, unwritten);
+static inline void set_buffer_unwritten_io(struct buffer_head *bh)
+{
+        bh->b_end_io = linvfs_unwritten_done;
+}
+#define restricted_chown        xfs_params.restrict_chown.val
+#define irix_sgid_inherit       xfs_params.sgid_inherit.val
+#define irix_symlink_mode       xfs_params.symlink_mode.val
+#define xfs_panic_mask          xfs_params.panic_mask.val
+#define xfs_error_level         xfs_params.error_level.val
+#define xfs_syncd_centisecs     xfs_params.syncd_timer.val
+#define xfs_stats_clear         xfs_params.stats_clear.val
+#define xfs_inherit_sync        xfs_params.inherit_sync.val
+#define xfs_inherit_nodump      xfs_params.inherit_nodump.val
+#define xfs_inherit_noatime     xfs_params.inherit_noatim.val
+#define xfs_buf_timer_centisecs xfs_params.xfs_buf_timer.val
+#define xfs_buf_age_centisecs   xfs_params.xfs_buf_age.val
+#define xfs_inherit_nosymlinks  xfs_params.inherit_nosym.val
+#define xfs_rotorstep           xfs_params.rotorstep.val
+#ifndef __smp_processor_id
+#define __smp_processor_id()    smp_processor_id()
+#endif
+#define current_cpu()           __smp_processor_id()
+#define current_pid()           (current->pid)
+#define current_fsuid(cred)     (current->fsuid)
+#define current_fsgid(cred)     (current->fsgid)
+#define NBPP            PAGE_SIZE
+#define DPPSHFT         (PAGE_SHIFT - 9)
+#define NDPP            (1 << (PAGE_SHIFT - 9))
+#define dtop(DD)        (((DD) + NDPP - 1) >> DPPSHFT)
+#define dtopt(DD)       ((DD) >> DPPSHFT)
+#define dpoff(DD)       ((DD) & (NDPP-1))
+#define NBBY            8               /* number of bits per byte */
+#define NBPC            PAGE_SIZE       /* Number of bytes per click */
+#define BPCSHIFT        PAGE_SHIFT      /* LOG2(NBPC) if exact */
+/*
+ * Size of block device i/o is parameterized here.
+ * Currently the system supports page-sized i/o.
+ */
+#define BLKDEV_IOSHIFT          BPCSHIFT
+#define BLKDEV_IOSIZE           (1<<BLKDEV_IOSHIFT)
+/* number of BB's per block device block */
+#define BLKDEV_BB               BTOBB(BLKDEV_IOSIZE)
+/* bytes to clicks */
+#define btoc(x)         (((__psunsigned_t)(x)+(NBPC-1))>>BPCSHIFT)
+#define btoct(x)        ((__psunsigned_t)(x)>>BPCSHIFT)
+#define btoc64(x)       (((__uint64_t)(x)+(NBPC-1))>>BPCSHIFT)
+#define btoct64(x)      ((__uint64_t)(x)>>BPCSHIFT)
+#define io_btoc(x)      (((__psunsigned_t)(x)+(IO_NBPC-1))>>IO_BPCSHIFT)
+#define io_btoct(x)     ((__psunsigned_t)(x)>>IO_BPCSHIFT)
+/* off_t bytes to clicks */
+#define offtoc(x)       (((__uint64_t)(x)+(NBPC-1))>>BPCSHIFT)
+#define offtoct(x)      ((xfs_off_t)(x)>>BPCSHIFT)
+/* clicks to off_t bytes */
+#define ctooff(x)       ((xfs_off_t)(x)<<BPCSHIFT)
+/* clicks to bytes */
+#define ctob(x)         ((__psunsigned_t)(x)<<BPCSHIFT)
+#define btoct(x)        ((__psunsigned_t)(x)>>BPCSHIFT)
+#define ctob64(x)       ((__uint64_t)(x)<<BPCSHIFT)
+#define io_ctob(x)      ((__psunsigned_t)(x)<<IO_BPCSHIFT)
+/* bytes to clicks */
+#define btoc(x)         (((__psunsigned_t)(x)+(NBPC-1))>>BPCSHIFT)
+#ifndef CELL_CAPABLE
+#define FSC_NOTIFY_NAME_CHANGED(vp)
+#endif
+#ifndef ENOATTR
+#define ENOATTR         ENODATA         /* Attribute not found */
+#endif
+/* Note: EWRONGFS never visible outside the kernel */
+#define EWRONGFS        EINVAL          /* Mount with wrong filesystem type */
+/*
+ * XXX EFSCORRUPTED needs a real value in errno.h. asm-i386/errno.h won't
+ *     return codes out of its known range in errno.
+ * XXX Also note: needs to be < 1000 and fairly unique on Linux (mustn't
+ *     conflict with any code we use already or any code a driver may use)
+ * XXX Some options (currently we do #2):
+ *      1/ New error code ["Filesystem is corrupted", _after_ glibc updated]
+ *      2/ 990 ["Unknown error 990"]
+ *      3/ EUCLEAN ["Structure needs cleaning"]
+ *      4/ Convert EFSCORRUPTED to EIO [just prior to return into userspace]
+ */
+#define EFSCORRUPTED    990             /* Filesystem is corrupted */
+#define SYNCHRONIZE()   barrier()
+#define __return_address __builtin_return_address(0)
+/*
+ * IRIX (BSD) quotactl makes use of separate commands for user/group,
+ * whereas on Linux the syscall encodes this information into the cmd
+ * field (see the QCMD macro in quota.h).  These macros help keep the
+ * code portable - they are not visible from the syscall interface.
+ */
+#define Q_XSETGQLIM     XQM_CMD(0x8)    /* set groups disk limits */
+#define Q_XGETGQUOTA    XQM_CMD(0x9)    /* get groups disk limits */
+/* IRIX uses a dynamic sizing algorithm (ndquot = 200 + numprocs*2) */
+/* we may well need to fine-tune this if it ever becomes an issue.  */
+#define DQUOT_MAX_HEURISTIC     1024    /* NR_DQUOTS */
+#define ndquot                  DQUOT_MAX_HEURISTIC
+/* IRIX uses the current size of the name cache to guess a good value */
+/* - this isn't the same but is a good enough starting point for now. */
+#define DQUOT_HASH_HEURISTIC    files_stat.nr_files
+/* IRIX inodes maintain the project ID also, zero this field on Linux */
+#define DEFAULT_PROJID  0
+#define dfltprid        DEFAULT_PROJID
+#define MAXPATHLEN      1024
+#define MIN(a,b)        (min(a,b))
+#define MAX(a,b)        (max(a,b))
+#define howmany(x, y)   (((x)+((y)-1))/(y))
+#define roundup(x, y)   ((((x)+((y)-1))/(y))*(y))
+#define xfs_stack_trace()       dump_stack()
+#define xfs_itruncate_data(ip, off)     \
+        (-vmtruncate(LINVFS_GET_IP(XFS_ITOV(ip)), (off)))
+/* Move the kernel do_div definition off to one side */
+#if defined __i386__
+/* For ia32 we need to pull some tricks to get past various versions
+ * of the compiler which do not like us using do_div in the middle
+ * of large functions.
+ */
+static inline __u32 xfs_do_div(void *a, __u32 b, int n)
+{
+        __u32   mod;
+        switch (n) {
+                case 4:
+                        mod = *(__u32 *)a % b;
+                        *(__u32 *)a = *(__u32 *)a / b;
+                        return mod;
+                case 8:
+                        {
+                        unsigned long __upper, __low, __high, __mod;
+                        __u64   c = *(__u64 *)a;
+                        __upper = __high = c >> 32;
+                        __low = c;
+                        if (__high) {
+                                __upper = __high % (b);
+                                __high = __high / (b);
+                        }
+                        asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper));
+                        asm("":"=A" (c):"a" (__low),"d" (__high));
+                        *(__u64 *)a = c;
+                        return __mod;
+                        }
+        }
+        /* NOTREACHED */
+        return 0;
+}
+/* Side effect free 64 bit mod operation */
+static inline __u32 xfs_do_mod(void *a, __u32 b, int n)
+{
+        switch (n) {
+                case 4:
+                        return *(__u32 *)a % b;
+                case 8:
+                        {
+                        unsigned long __upper, __low, __high, __mod;
+                        __u64   c = *(__u64 *)a;
+                        __upper = __high = c >> 32;
+                        __low = c;
+                        if (__high) {
+                                __upper = __high % (b);
+                                __high = __high / (b);
+                        }
+                        asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper));
+                        asm("":"=A" (c):"a" (__low),"d" (__high));
+                        return __mod;
+                        }
+        }
+        /* NOTREACHED */
+        return 0;
+}
+#else
+static inline __u32 xfs_do_div(void *a, __u32 b, int n)
+{
+        __u32   mod;
+        switch (n) {
+                case 4:
+                        mod = *(__u32 *)a % b;
+                        *(__u32 *)a = *(__u32 *)a / b;
+                        return mod;
+                case 8:
+                        mod = do_div(*(__u64 *)a, b);
+                        return mod;
+        }
+        /* NOTREACHED */
+        return 0;
+}
+/* Side effect free 64 bit mod operation */
+static inline __u32 xfs_do_mod(void *a, __u32 b, int n)
+{
+        switch (n) {
+                case 4:
+                        return *(__u32 *)a % b;
+                case 8:
+                        {
+                        __u64   c = *(__u64 *)a;
+                        return do_div(c, b);
+                        }
+        }
+        /* NOTREACHED */
+        return 0;
+}
+#endif
+#undef do_div
+#define do_div(a, b)    xfs_do_div(&(a), (b), sizeof(a))
+#define do_mod(a, b)    xfs_do_mod(&(a), (b), sizeof(a))
+static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y)
+{
+        x += y - 1;
+        do_div(x, y);
+        return(x * y);
+}
+#define qsort(a, n, s, cmp) sort(a, n, s, cmp, NULL)
+#endif /* __XFS_LINUX__ */
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
new file mode 100644
index 000000000000..ff145fd0d1a4
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -0,0 +1,1082 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+/*
+ *  fs/xfs/linux/xfs_lrw.c (Linux Read Write stuff)
+ *
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_dir.h"
+#include "xfs_dir2.h"
+#include "xfs_alloc.h"
+#include "xfs_dmapi.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dir_sf.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_bmap.h"
+#include "xfs_bit.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_itable.h"
+#include "xfs_rw.h"
+#include "xfs_acl.h"
+#include "xfs_cap.h"
+#include "xfs_mac.h"
+#include "xfs_attr.h"
+#include "xfs_inode_item.h"
+#include "xfs_buf_item.h"
+#include "xfs_utils.h"
+#include "xfs_iomap.h"
+#include <linux/capability.h>
+#include <linux/writeback.h>
+#if defined(XFS_RW_TRACE)
+void
+xfs_rw_enter_trace(
+        int                     tag,
+        xfs_iocore_t            *io,
+        void                    *data,
+        size_t                  segs,
+        loff_t                  offset,
+        int                     ioflags)
+{
+        xfs_inode_t     *ip = XFS_IO_INODE(io);
+        if (ip->i_rwtrace == NULL)
+                return;
+        ktrace_enter(ip->i_rwtrace,
+                (void *)(unsigned long)tag,
+                (void *)ip,
+                (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)),
+                (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)),
+                (void *)data,
+                (void *)((unsigned long)segs),
+                (void *)((unsigned long)((offset >> 32) & 0xffffffff)),
+                (void *)((unsigned long)(offset & 0xffffffff)),
+                (void *)((unsigned long)ioflags),
+                (void *)((unsigned long)((io->io_new_size >> 32) & 0xffffffff)),
+                (void *)((unsigned long)(io->io_new_size & 0xffffffff)),
+                (void *)NULL,
+                (void *)NULL,
+                (void *)NULL,
+                (void *)NULL,
+                (void *)NULL);
+}
+void
+xfs_inval_cached_trace(
+        xfs_iocore_t    *io,
+        xfs_off_t       offset,
+        xfs_off_t       len,
+        xfs_off_t       first,
+        xfs_off_t       last)
+{
+        xfs_inode_t     *ip = XFS_IO_INODE(io);
+        if (ip->i_rwtrace == NULL)
+                return;
+        ktrace_enter(ip->i_rwtrace,
+                (void *)(__psint_t)XFS_INVAL_CACHED,
+                (void *)ip,
+                (void *)((unsigned long)((offset >> 32) & 0xffffffff)),
+                (void *)((unsigned long)(offset & 0xffffffff)),
+                (void *)((unsigned long)((len >> 32) & 0xffffffff)),
+                (void *)((unsigned long)(len & 0xffffffff)),
+                (void *)((unsigned long)((first >> 32) & 0xffffffff)),
+                (void *)((unsigned long)(first & 0xffffffff)),
+                (void *)((unsigned long)((last >> 32) & 0xffffffff)),
+                (void *)((unsigned long)(last & 0xffffffff)),
+                (void *)NULL,
+                (void *)NULL,
+                (void *)NULL,
+                (void *)NULL,
+                (void *)NULL,
+                (void *)NULL);
+}
+#endif
+/*
+ *      xfs_iozero
+ *
+ *      xfs_iozero clears the specified range of buffer supplied,
+ *      and marks all the affected blocks as valid and modified.  If
+ *      an affected block is not allocated, it will be allocated.  If
+ *      an affected block is not completely overwritten, and is not
+ *      valid before the operation, it will be read from disk before
+ *      being partially zeroed.
+ */
+STATIC int
+xfs_iozero(
+        struct inode            *ip,    /* inode                        */
+        loff_t                  pos,    /* offset in file               */
+        size_t                  count,  /* size of data to zero         */
+        loff_t                  end_size)       /* max file size to set */
+{
+        unsigned                bytes;
+        struct page             *page;
+        struct address_space    *mapping;
+        char                    *kaddr;
+        int                     status;
+        mapping = ip->i_mapping;
+        do {
+                unsigned long index, offset;
+                offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
+                index = pos >> PAGE_CACHE_SHIFT;
+                bytes = PAGE_CACHE_SIZE - offset;
+                if (bytes > count)
+                        bytes = count;
+                status = -ENOMEM;
+                page = grab_cache_page(mapping, index);
+                if (!page)
+                        break;
+                kaddr = kmap(page);
+                status = mapping->a_ops->prepare_write(NULL, page, offset,
+                                                        offset + bytes);
+                if (status) {
+                        goto unlock;
+                }
+                memset((void *) (kaddr + offset), 0, bytes);
+                flush_dcache_page(page);
+                status = mapping->a_ops->commit_write(NULL, page, offset,
+                                                        offset + bytes);
+                if (!status) {
+                        pos += bytes;
+                        count -= bytes;
+                        if (pos > i_size_read(ip))
+                                i_size_write(ip, pos < end_size ? pos : end_size);
+                }
+unlock:
+                kunmap(page);
+                unlock_page(page);
+                page_cache_release(page);
+                if (status)
+                        break;
+        } while (count);
+        return (-status);
+}
+/*
+ * xfs_inval_cached_pages
+ * 
+ * This routine is responsible for keeping direct I/O and buffered I/O
+ * somewhat coherent.  From here we make sure that we're at least
+ * temporarily holding the inode I/O lock exclusively and then call
+ * the page cache to flush and invalidate any cached pages.  If there
+ * are no cached pages this routine will be very quick.
+ */
+void
+xfs_inval_cached_pages(
+        vnode_t         *vp,
+        xfs_iocore_t    *io,
+        xfs_off_t       offset,
+        int             write,
+        int             relock)
+{
+        if (VN_CACHED(vp)) {
+                xfs_inval_cached_trace(io, offset, -1, ctooff(offtoct(offset)), -1);
+                VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(offset)), -1, FI_REMAPF_LOCKED);
+        }
+}
+ssize_t                 /* bytes read, or (-)  error */
+xfs_read(
+        bhv_desc_t              *bdp,
+        struct kiocb            *iocb,
+        const struct iovec      *iovp,
+        unsigned int            segs,
+        loff_t                  *offset,
+        int                     ioflags,
+        cred_t                  *credp)
+{
+        struct file             *file = iocb->ki_filp;
+        struct inode            *inode = file->f_mapping->host;
+        size_t                  size = 0;
+        ssize_t                 ret;
+        xfs_fsize_t             n;
+        xfs_inode_t             *ip;
+        xfs_mount_t             *mp;
+        vnode_t                 *vp;
+        unsigned long           seg;
+        ip = XFS_BHVTOI(bdp);
+        vp = BHV_TO_VNODE(bdp);
+        mp = ip->i_mount;
+        XFS_STATS_INC(xs_read_calls);
+        /* START copy & waste from filemap.c */
+        for (seg = 0; seg < segs; seg++) {
+                const struct iovec *iv = &iovp[seg];
+                /*
+                 * If any segment has a negative length, or the cumulative
+                 * length ever wraps negative then return -EINVAL.
+                 */
+                size += iv->iov_len;
+                if (unlikely((ssize_t)(size|iv->iov_len) < 0))
+                        return XFS_ERROR(-EINVAL);
+        }
+        /* END copy & waste from filemap.c */
+        if (unlikely(ioflags & IO_ISDIRECT)) {
+                xfs_buftarg_t   *target =
+                        (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
+                                mp->m_rtdev_targp : mp->m_ddev_targp;
+                if ((*offset & target->pbr_smask) ||
+                    (size & target->pbr_smask)) {
+                        if (*offset == ip->i_d.di_size) {
+                                return (0);
+                        }
+                        return -XFS_ERROR(EINVAL);
+                }
+        }
+        n = XFS_MAXIOFFSET(mp) - *offset;
+        if ((n <= 0) || (size == 0))
+                return 0;
+        if (n < size)
+                size = n;
+        if (XFS_FORCED_SHUTDOWN(mp)) {
+                return -EIO;
+        }
+        if (unlikely(ioflags & IO_ISDIRECT))
+                down(&inode->i_sem);
+        xfs_ilock(ip, XFS_IOLOCK_SHARED);
+        if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) &&
+            !(ioflags & IO_INVIS)) {
+                vrwlock_t locktype = VRWLOCK_READ;
+                ret = -XFS_SEND_DATA(mp, DM_EVENT_READ,
+                                        BHV_TO_VNODE(bdp), *offset, size,
+                                        FILP_DELAY_FLAG(file), &locktype);
+                if (ret) {
+                        xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+                        goto unlock_isem;
+                }
+        }
+        xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore,
+                                (void *)iovp, segs, *offset, ioflags);
+        ret = __generic_file_aio_read(iocb, iovp, segs, offset);
+        if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO))
+                ret = wait_on_sync_kiocb(iocb);
+        if (ret > 0)
+                XFS_STATS_ADD(xs_read_bytes, ret);
+        xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+        if (likely(!(ioflags & IO_INVIS)))
+                xfs_ichgtime(ip, XFS_ICHGTIME_ACC);
+unlock_isem:
+        if (unlikely(ioflags & IO_ISDIRECT))
+                up(&inode->i_sem);
+        return ret;
+}
+ssize_t
+xfs_sendfile(
+        bhv_desc_t              *bdp,
+        struct file             *filp,
+        loff_t                  *offset,
+        int                     ioflags,
+        size_t                  count,
+        read_actor_t            actor,
+        void                    *target,
+        cred_t                  *credp)
+{
+        ssize_t                 ret;
+        xfs_fsize_t             n;
+        xfs_inode_t             *ip;
+        xfs_mount_t             *mp;
+        vnode_t                 *vp;
+        ip = XFS_BHVTOI(bdp);
+        vp = BHV_TO_VNODE(bdp);
+        mp = ip->i_mount;
+        XFS_STATS_INC(xs_read_calls);
+        n = XFS_MAXIOFFSET(mp) - *offset;
+        if ((n <= 0) || (count == 0))
+                return 0;
+        if (n < count)
+                count = n;
+        if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+                return -EIO;
+        xfs_ilock(ip, XFS_IOLOCK_SHARED);
+        if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) &&
+            (!(ioflags & IO_INVIS))) {
+                vrwlock_t locktype = VRWLOCK_READ;
+                int error;
+                error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp), *offset, count,
+                                      FILP_DELAY_FLAG(filp), &locktype);
+                if (error) {
+                        xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+                        return -error;
+                }
+        }
+        xfs_rw_enter_trace(XFS_SENDFILE_ENTER, &ip->i_iocore,
+                   (void *)(unsigned long)target, count, *offset, ioflags);
+        ret = generic_file_sendfile(filp, offset, count, actor, target);
+        xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+        if (ret > 0)
+                XFS_STATS_ADD(xs_read_bytes, ret);
+        if (likely(!(ioflags & IO_INVIS)))
+                xfs_ichgtime(ip, XFS_ICHGTIME_ACC);
+        return ret;
+}
+/*
+ * This routine is called to handle zeroing any space in the last
+ * block of the file that is beyond the EOF.  We do this since the
+ * size is being increased without writing anything to that block
+ * and we don't want anyone to read the garbage on the disk.
+ */
+STATIC int                              /* error (positive) */
+xfs_zero_last_block(
+        struct inode    *ip,
+        xfs_iocore_t    *io,
+        xfs_off_t       offset,
+        xfs_fsize_t     isize,
+        xfs_fsize_t     end_size)
+{
+        xfs_fileoff_t   last_fsb;
+        xfs_mount_t     *mp;
+        int             nimaps;
+        int             zero_offset;
+        int             zero_len;
+        int             isize_fsb_offset;
+        int             error = 0;
+        xfs_bmbt_irec_t imap;
+        loff_t          loff;
+        size_t          lsize;
+        ASSERT(ismrlocked(io->io_lock, MR_UPDATE) != 0);
+        ASSERT(offset > isize);
+        mp = io->io_mount;
+        isize_fsb_offset = XFS_B_FSB_OFFSET(mp, isize);
+        if (isize_fsb_offset == 0) {
+                /*
+                 * There are no extra bytes in the last block on disk to
+                 * zero, so return.
+                 */
+                return 0;
+        }
+        last_fsb = XFS_B_TO_FSBT(mp, isize);
+        nimaps = 1;
+        error = XFS_BMAPI(mp, NULL, io, last_fsb, 1, 0, NULL, 0, &imap,
+                          &nimaps, NULL);
+        if (error) {
+                return error;
+        }
+        ASSERT(nimaps > 0);
+        /*
+         * If the block underlying isize is just a hole, then there
+         * is nothing to zero.
+         */
+        if (imap.br_startblock == HOLESTARTBLOCK) {
+                return 0;
+        }
+        /*
+         * Zero the part of the last block beyond the EOF, and write it
+         * out sync.  We need to drop the ilock while we do this so we
+         * don't deadlock when the buffer cache calls back to us.
+         */
+        XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD);
+        loff = XFS_FSB_TO_B(mp, last_fsb);
+        lsize = XFS_FSB_TO_B(mp, 1);
+        zero_offset = isize_fsb_offset;
+        zero_len = mp->m_sb.sb_blocksize - isize_fsb_offset;
+        error = xfs_iozero(ip, loff + zero_offset, zero_len, end_size);
+        XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
+        ASSERT(error >= 0);
+        return error;
+}
+/*
+ * Zero any on disk space between the current EOF and the new,
+ * larger EOF.  This handles the normal case of zeroing the remainder
+ * of the last block in the file and the unusual case of zeroing blocks
+ * out beyond the size of the file.  This second case only happens
+ * with fixed size extents and when the system crashes before the inode
+ * size was updated but after blocks were allocated.  If fill is set,
+ * then any holes in the range are filled and zeroed.  If not, the holes
+ * are left alone as holes.
+ */
+int                                     /* error (positive) */
+xfs_zero_eof(
+        vnode_t         *vp,
+        xfs_iocore_t    *io,
+        xfs_off_t       offset,         /* starting I/O offset */
+        xfs_fsize_t     isize,          /* current inode size */
+        xfs_fsize_t     end_size)       /* terminal inode size */
+{
+        struct inode    *ip = LINVFS_GET_IP(vp);
+        xfs_fileoff_t   start_zero_fsb;
+        xfs_fileoff_t   end_zero_fsb;
+        xfs_fileoff_t   prev_zero_fsb;
+        xfs_fileoff_t   zero_count_fsb;
+        xfs_fileoff_t   last_fsb;
+        xfs_extlen_t    buf_len_fsb;
+        xfs_extlen_t    prev_zero_count;
+        xfs_mount_t     *mp;
+        int             nimaps;
+        int             error = 0;
+        xfs_bmbt_irec_t imap;
+        loff_t          loff;
+        size_t          lsize;
+        ASSERT(ismrlocked(io->io_lock, MR_UPDATE));
+        ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
+        mp = io->io_mount;
+        /*
+         * First handle zeroing the block on which isize resides.
+         * We only zero a part of that block so it is handled specially.
+         */
+        error = xfs_zero_last_block(ip, io, offset, isize, end_size);
+        if (error) {
+                ASSERT(ismrlocked(io->io_lock, MR_UPDATE));
+                ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
+                return error;
+        }
+        /*
+         * Calculate the range between the new size and the old
+         * where blocks needing to be zeroed may exist.  To get the
+         * block where the last byte in the file currently resides,
+         * we need to subtract one from the size and truncate back
+         * to a block boundary.  We subtract 1 in case the size is
+         * exactly on a block boundary.
+         */
+        last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1;
+        start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
+        end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1);
+        ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb);
+        if (last_fsb == end_zero_fsb) {
+                /*
+                 * The size was only incremented on its last block.
+                 * We took care of that above, so just return.
+                 */
+                return 0;
+        }
+        ASSERT(start_zero_fsb <= end_zero_fsb);
+        prev_zero_fsb = NULLFILEOFF;
+        prev_zero_count = 0;
+        while (start_zero_fsb <= end_zero_fsb) {
+                nimaps = 1;
+                zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
+                error = XFS_BMAPI(mp, NULL, io, start_zero_fsb, zero_count_fsb,
+                                  0, NULL, 0, &imap, &nimaps, NULL);
+                if (error) {
+                        ASSERT(ismrlocked(io->io_lock, MR_UPDATE));
+                        ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
+                        return error;
+                }
+                ASSERT(nimaps > 0);
+                if (imap.br_state == XFS_EXT_UNWRITTEN ||
+                    imap.br_startblock == HOLESTARTBLOCK) {
+                        /*
+                         * This loop handles initializing pages that were
+                         * partially initialized by the code below this
+                         * loop. It basically zeroes the part of the page
+                         * that sits on a hole and sets the page as P_HOLE
+                         * and calls remapf if it is a mapped file.
+                         */
+                        prev_zero_fsb = NULLFILEOFF;
+                        prev_zero_count = 0;
+                        start_zero_fsb = imap.br_startoff +
+                                         imap.br_blockcount;
+                        ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
+                        continue;
+                }
+                /*
+                 * There are blocks in the range requested.
+                 * Zero them a single write at a time.  We actually
+                 * don't zero the entire range returned if it is
+                 * too big and simply loop around to get the rest.
+                 * That is not the most efficient thing to do, but it
+                 * is simple and this path should not be exercised often.
+                 */
+                buf_len_fsb = XFS_FILBLKS_MIN(imap.br_blockcount,
+                                              mp->m_writeio_blocks << 8);
+                /*
+                 * Drop the inode lock while we're doing the I/O.
+                 * We'll still have the iolock to protect us.
+                 */
+                XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
+                loff = XFS_FSB_TO_B(mp, start_zero_fsb);
+                lsize = XFS_FSB_TO_B(mp, buf_len_fsb);
+                error = xfs_iozero(ip, loff, lsize, end_size);
+                if (error) {
+                        goto out_lock;
+                }
+                prev_zero_fsb = start_zero_fsb;
+                prev_zero_count = buf_len_fsb;
+                start_zero_fsb = imap.br_startoff + buf_len_fsb;
+                ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
+                XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
+        }
+        return 0;
+out_lock:
+        XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
+        ASSERT(error >= 0);
+        return error;
+}
+ssize_t                         /* bytes written, or (-) error */
+xfs_write(
+        bhv_desc_t              *bdp,
+        struct kiocb            *iocb,
+        const struct iovec      *iovp,
+        unsigned int            nsegs,
+        loff_t                  *offset,
+        int                     ioflags,
+        cred_t                  *credp)
+{
+        struct file             *file = iocb->ki_filp;
+        struct address_space    *mapping = file->f_mapping;
+        struct inode            *inode = mapping->host;
+        unsigned long           segs = nsegs;
+        xfs_inode_t             *xip;
+        xfs_mount_t             *mp;
+        ssize_t                 ret = 0, error = 0;
+        xfs_fsize_t             isize, new_size;
+        xfs_iocore_t            *io;
+        vnode_t                 *vp;
+        unsigned long           seg;
+        int                     iolock;
+        int                     eventsent = 0;
+        vrwlock_t               locktype;
+        size_t                  ocount = 0, count;
+        loff_t                  pos;
+        int                     need_isem = 1, need_flush = 0;
+        XFS_STATS_INC(xs_write_calls);
+        vp = BHV_TO_VNODE(bdp);
+        xip = XFS_BHVTOI(bdp);
+        for (seg = 0; seg < segs; seg++) {
+                const struct iovec *iv = &iovp[seg];
+                /*
+                 * If any segment has a negative length, or the cumulative
+                 * length ever wraps negative then return -EINVAL.
+                 */
+                ocount += iv->iov_len;
+                if (unlikely((ssize_t)(ocount|iv->iov_len) < 0))
+                        return -EINVAL;
+                if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
+                        continue;
+                if (seg == 0)
+                        return -EFAULT;
+                segs = seg;
+                ocount -= iv->iov_len;  /* This segment is no good */
+                break;
+        }
+        count = ocount;
+        pos = *offset;
+        if (count == 0)
+                return 0;
+        io = &xip->i_iocore;
+        mp = io->io_mount;
+        if (XFS_FORCED_SHUTDOWN(mp))
+                return -EIO;
+        fs_check_frozen(vp->v_vfsp, SB_FREEZE_WRITE);
+        if (ioflags & IO_ISDIRECT) {
+                xfs_buftarg_t   *target =
+                        (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
+                                mp->m_rtdev_targp : mp->m_ddev_targp;
+                if ((pos & target->pbr_smask) || (count & target->pbr_smask))
+                        return XFS_ERROR(-EINVAL);
+                if (!VN_CACHED(vp) && pos < i_size_read(inode))
+                        need_isem = 0;
+                if (VN_CACHED(vp))
+                        need_flush = 1;
+        }
+relock:
+        if (need_isem) {
+                iolock = XFS_IOLOCK_EXCL;
+                locktype = VRWLOCK_WRITE;
+                down(&inode->i_sem);
+        } else {
+                iolock = XFS_IOLOCK_SHARED;
+                locktype = VRWLOCK_WRITE_DIRECT;
+        }
+        xfs_ilock(xip, XFS_ILOCK_EXCL|iolock);
+        isize = i_size_read(inode);
+        if (file->f_flags & O_APPEND)
+                *offset = isize;
+start:
+        error = -generic_write_checks(file, &pos, &count,
+                                        S_ISBLK(inode->i_mode));
+        if (error) {
+                xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
+                goto out_unlock_isem;
+        }
+        new_size = pos + count;
+        if (new_size > isize)
+                io->io_new_size = new_size;
+        if ((DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_WRITE) &&
+            !(ioflags & IO_INVIS) && !eventsent)) {
+                loff_t          savedsize = pos;
+                int             dmflags = FILP_DELAY_FLAG(file);
+                if (need_isem)
+                        dmflags |= DM_FLAGS_ISEM;
+                xfs_iunlock(xip, XFS_ILOCK_EXCL);
+                error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp,
+                                      pos, count,
+                                      dmflags, &locktype);
+                if (error) {
+                        xfs_iunlock(xip, iolock);
+                        goto out_unlock_isem;
+                }
+                xfs_ilock(xip, XFS_ILOCK_EXCL);
+                eventsent = 1;
+                /*
+                 * The iolock was dropped and reaquired in XFS_SEND_DATA
+                 * so we have to recheck the size when appending.
+                 * We will only "goto start;" once, since having sent the
+                 * event prevents another call to XFS_SEND_DATA, which is
+                 * what allows the size to change in the first place.
+                 */
+                if ((file->f_flags & O_APPEND) && savedsize != isize) {
+                        pos = isize = xip->i_d.di_size;
+                        goto start;
+                }
+        }
+        /*
+         * On Linux, generic_file_write updates the times even if
+         * no data is copied in so long as the write had a size.
+         *
+         * We must update xfs' times since revalidate will overcopy xfs.
+         */
+        if (!(ioflags & IO_INVIS)) {
+                xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+                inode_update_time(inode, 1);
+        }
+        /*
+         * If the offset is beyond the size of the file, we have a couple
+         * of things to do. First, if there is already space allocated
+         * we need to either create holes or zero the disk or ...
+         *
+         * If there is a page where the previous size lands, we need
+         * to zero it out up to the new size.
+         */
+        if (pos > isize) {
+                error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, pos,
+                                        isize, pos + count);
+                if (error) {
+                        xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
+                        goto out_unlock_isem;
+                }
+        }
+        xfs_iunlock(xip, XFS_ILOCK_EXCL);
+        /*
+         * If we're writing the file then make sure to clear the
+         * setuid and setgid bits if the process is not being run
+         * by root.  This keeps people from modifying setuid and
+         * setgid binaries.
+         */
+        if (((xip->i_d.di_mode & S_ISUID) ||
+            ((xip->i_d.di_mode & (S_ISGID | S_IXGRP)) ==
+                (S_ISGID | S_IXGRP))) &&
+             !capable(CAP_FSETID)) {
+                error = xfs_write_clear_setuid(xip);
+                if (likely(!error))
+                        error = -remove_suid(file->f_dentry);
+                if (unlikely(error)) {
+                        xfs_iunlock(xip, iolock);
+                        goto out_unlock_isem;
+                }
+        }
+retry:
+        /* We can write back this queue in page reclaim */
+        current->backing_dev_info = mapping->backing_dev_info;
+        if ((ioflags & IO_ISDIRECT)) {
+                if (need_flush) {
+                        xfs_inval_cached_trace(io, pos, -1,
+                                        ctooff(offtoct(pos)), -1);
+                        VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(pos)),
+                                        -1, FI_REMAPF_LOCKED);
+                }
+                if (need_isem) {
+                        /* demote the lock now the cached pages are gone */
+                        XFS_ILOCK_DEMOTE(mp, io, XFS_IOLOCK_EXCL);
+                        up(&inode->i_sem);
+                        iolock = XFS_IOLOCK_SHARED;
+                        locktype = VRWLOCK_WRITE_DIRECT;
+                        need_isem = 0;
+                }
+                xfs_rw_enter_trace(XFS_DIOWR_ENTER, io, (void *)iovp, segs,
+                                *offset, ioflags);
+                ret = generic_file_direct_write(iocb, iovp,
+                                &segs, pos, offset, count, ocount);
+                /*
+                 * direct-io write to a hole: fall through to buffered I/O
+                 * for completing the rest of the request.
+                 */
+                if (ret >= 0 && ret != count) {
+                        XFS_STATS_ADD(xs_write_bytes, ret);
+                        pos += ret;
+                        count -= ret;
+                        need_isem = 1;
+                        ioflags &= ~IO_ISDIRECT;
+                        xfs_iunlock(xip, iolock);
+                        goto relock;
+                }
+        } else {
+                xfs_rw_enter_trace(XFS_WRITE_ENTER, io, (void *)iovp, segs,
+                                *offset, ioflags);
+                ret = generic_file_buffered_write(iocb, iovp, segs,
+                                pos, offset, count, ret);
+        }
+        current->backing_dev_info = NULL;
+        if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO))
+                ret = wait_on_sync_kiocb(iocb);
+        if ((ret == -ENOSPC) &&
+            DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_NOSPACE) &&
+            !(ioflags & IO_INVIS)) {
+                xfs_rwunlock(bdp, locktype);
+                error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp,
+                                DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL,
+                                0, 0, 0); /* Delay flag intentionally  unused */
+                if (error)
+                        goto out_unlock_isem;
+                xfs_rwlock(bdp, locktype);
+                pos = xip->i_d.di_size;
+                ret = 0;
+                goto retry;
+        }
+        if (*offset > xip->i_d.di_size) {
+                xfs_ilock(xip, XFS_ILOCK_EXCL);
+                if (*offset > xip->i_d.di_size) {
+                        xip->i_d.di_size = *offset;
+                        i_size_write(inode, *offset);
+                        xip->i_update_core = 1;
+                        xip->i_update_size = 1;
+                }
+                xfs_iunlock(xip, XFS_ILOCK_EXCL);
+        }
+        error = -ret;
+        if (ret <= 0)
+                goto out_unlock_internal;
+        XFS_STATS_ADD(xs_write_bytes, ret);
+        /* Handle various SYNC-type writes */
+        if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) {
+                /*
+                 * If we're treating this as O_DSYNC and we have not updated the
+                 * size, force the log.
+                 */
+                if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) &&
+                    !(xip->i_update_size)) {
+                        xfs_inode_log_item_t    *iip = xip->i_itemp;
+                        /*
+                         * If an allocation transaction occurred
+                         * without extending the size, then we have to force
+                         * the log up the proper point to ensure that the
+                         * allocation is permanent.  We can't count on
+                         * the fact that buffered writes lock out direct I/O
+                         * writes - the direct I/O write could have extended
+                         * the size nontransactionally, then finished before
+                         * we started.  xfs_write_file will think that the file
+                         * didn't grow but the update isn't safe unless the
+                         * size change is logged.
+                         *
+                         * Force the log if we've committed a transaction
+                         * against the inode or if someone else has and
+                         * the commit record hasn't gone to disk (e.g.
+                         * the inode is pinned).  This guarantees that
+                         * all changes affecting the inode are permanent
+                         * when we return.
+                         */
+                        if (iip && iip->ili_last_lsn) {
+                                xfs_log_force(mp, iip->ili_last_lsn,
+                                                XFS_LOG_FORCE | XFS_LOG_SYNC);
+                        } else if (xfs_ipincount(xip) > 0) {
+                                xfs_log_force(mp, (xfs_lsn_t)0,
+                                                XFS_LOG_FORCE | XFS_LOG_SYNC);
+                        }
+                } else {
+                        xfs_trans_t     *tp;
+                        /*
+                         * O_SYNC or O_DSYNC _with_ a size update are handled
+                         * the same way.
+                         *
+                         * If the write was synchronous then we need to make
+                         * sure that the inode modification time is permanent.
+                         * We'll have updated the timestamp above, so here
+                         * we use a synchronous transaction to log the inode.
+                         * It's not fast, but it's necessary.
+                         *
+                         * If this a dsync write and the size got changed
+                         * non-transactionally, then we need to ensure that
+                         * the size change gets logged in a synchronous
+                         * transaction.
+                         */
+                        tp = xfs_trans_alloc(mp, XFS_TRANS_WRITE_SYNC);
+                        if ((error = xfs_trans_reserve(tp, 0,
+                                                      XFS_SWRITE_LOG_RES(mp),
+                                                      0, 0, 0))) {
+                                /* Transaction reserve failed */
+                                xfs_trans_cancel(tp, 0);
+                        } else {
+                                /* Transaction reserve successful */
+                                xfs_ilock(xip, XFS_ILOCK_EXCL);
+                                xfs_trans_ijoin(tp, xip, XFS_ILOCK_EXCL);
+                                xfs_trans_ihold(tp, xip);
+                                xfs_trans_log_inode(tp, xip, XFS_ILOG_CORE);
+                                xfs_trans_set_sync(tp);
+                                error = xfs_trans_commit(tp, 0, NULL);
+                                xfs_iunlock(xip, XFS_ILOCK_EXCL);
+                        }
+                        if (error)
+                                goto out_unlock_internal;
+                }
+        
+                xfs_rwunlock(bdp, locktype);
+                if (need_isem)
+                        up(&inode->i_sem);
+                error = sync_page_range(inode, mapping, pos, ret);
+                if (!error)
+                        error = ret;
+                return error;
+        }
+ out_unlock_internal:
+        xfs_rwunlock(bdp, locktype);
+ out_unlock_isem:
+        if (need_isem)
+                up(&inode->i_sem);
+        return -error;
+}
+/*
+ * All xfs metadata buffers except log state machine buffers
+ * get this attached as their b_bdstrat callback function.
+ * This is so that we can catch a buffer
+ * after prematurely unpinning it to forcibly shutdown the filesystem.
+ */
+int
+xfs_bdstrat_cb(struct xfs_buf *bp)
+{
+        xfs_mount_t     *mp;
+        mp = XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *);
+        if (!XFS_FORCED_SHUTDOWN(mp)) {
+                pagebuf_iorequest(bp);
+                return 0;
+        } else {
+                xfs_buftrace("XFS__BDSTRAT IOERROR", bp);
+                /*
+                 * Metadata write that didn't get logged but
+                 * written delayed anyway. These aren't associated
+                 * with a transaction, and can be ignored.
+                 */
+                if (XFS_BUF_IODONE_FUNC(bp) == NULL &&
+                    (XFS_BUF_ISREAD(bp)) == 0)
+                        return (xfs_bioerror_relse(bp));
+                else
+                        return (xfs_bioerror(bp));
+        }
+}
+int
+xfs_bmap(bhv_desc_t     *bdp,
+        xfs_off_t       offset,
+        ssize_t         count,
+        int             flags,
+        xfs_iomap_t     *iomapp,
+        int             *niomaps)
+{
+        xfs_inode_t     *ip = XFS_BHVTOI(bdp);
+        xfs_iocore_t    *io = &ip->i_iocore;
+        ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
+        ASSERT(((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != 0) ==
+               ((ip->i_iocore.io_flags & XFS_IOCORE_RT) != 0));
+        return xfs_iomap(io, offset, count, flags, iomapp, niomaps);
+}
+/*
+ * Wrapper around bdstrat so that we can stop data
+ * from going to disk in case we are shutting down the filesystem.
+ * Typically user data goes thru this path; one of the exceptions
+ * is the superblock.
+ */
+int
+xfsbdstrat(
+        struct xfs_mount        *mp,
+        struct xfs_buf          *bp)
+{
+        ASSERT(mp);
+        if (!XFS_FORCED_SHUTDOWN(mp)) {
+                /* Grio redirection would go here
+                 * if (XFS_BUF_IS_GRIO(bp)) {
+                 */
+                pagebuf_iorequest(bp);
+                return 0;
+        }
+        xfs_buftrace("XFSBDSTRAT IOERROR", bp);
+        return (xfs_bioerror_relse(bp));
+}
+/*
+ * If the underlying (data/log/rt) device is readonly, there are some
+ * operations that cannot proceed.
+ */
+int
+xfs_dev_is_read_only(
+        xfs_mount_t             *mp,
+        char                    *message)
+{
+        if (xfs_readonly_buftarg(mp->m_ddev_targp) ||
+            xfs_readonly_buftarg(mp->m_logdev_targp) ||
+            (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) {
+                cmn_err(CE_NOTE,
+                        "XFS: %s required on read-only device.", message);
+                cmn_err(CE_NOTE,
+                        "XFS: write access unavailable, cannot proceed.");
+                return EROFS;
+        }
+        return 0;
+}
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
new file mode 100644
index 000000000000..d723e35254a0
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_lrw.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_LRW_H__
+#define __XFS_LRW_H__
+struct vnode;
+struct bhv_desc;
+struct xfs_mount;
+struct xfs_iocore;
+struct xfs_inode;
+struct xfs_bmbt_irec;
+struct xfs_buf;
+struct xfs_iomap;
+#if defined(XFS_RW_TRACE)
+/*
+ * Defines for the trace mechanisms in xfs_lrw.c.
+ */
+#define XFS_RW_KTRACE_SIZE      128
+#define XFS_READ_ENTER          1
+#define XFS_WRITE_ENTER         2
+#define XFS_IOMAP_READ_ENTER    3
+#define XFS_IOMAP_WRITE_ENTER   4
+#define XFS_IOMAP_READ_MAP      5
+#define XFS_IOMAP_WRITE_MAP     6
+#define XFS_IOMAP_WRITE_NOSPACE 7
+#define XFS_ITRUNC_START        8
+#define XFS_ITRUNC_FINISH1      9
+#define XFS_ITRUNC_FINISH2      10
+#define XFS_CTRUNC1             11
+#define XFS_CTRUNC2             12
+#define XFS_CTRUNC3             13
+#define XFS_CTRUNC4             14
+#define XFS_CTRUNC5             15
+#define XFS_CTRUNC6             16
+#define XFS_BUNMAPI             17
+#define XFS_INVAL_CACHED        18
+#define XFS_DIORD_ENTER         19
+#define XFS_DIOWR_ENTER         20
+#define XFS_SENDFILE_ENTER      21
+#define XFS_WRITEPAGE_ENTER     22
+#define XFS_RELEASEPAGE_ENTER   23
+#define XFS_IOMAP_ALLOC_ENTER   24
+#define XFS_IOMAP_ALLOC_MAP     25
+#define XFS_IOMAP_UNWRITTEN     26
+extern void xfs_rw_enter_trace(int, struct xfs_iocore *,
+                                void *, size_t, loff_t, int);
+extern void xfs_inval_cached_trace(struct xfs_iocore *,
+                                xfs_off_t, xfs_off_t, xfs_off_t, xfs_off_t);
+#else
+#define xfs_rw_enter_trace(tag, io, data, size, offset, ioflags)
+#define xfs_inval_cached_trace(io, offset, len, first, last)
+#endif
+/*
+ * Maximum count of bmaps used by read and write paths.
+ */
+#define XFS_MAX_RW_NBMAPS       4
+extern int xfs_bmap(struct bhv_desc *, xfs_off_t, ssize_t, int,
+                        struct xfs_iomap *, int *);
+extern int xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
+extern int xfs_bdstrat_cb(struct xfs_buf *);
+extern int xfs_zero_eof(struct vnode *, struct xfs_iocore *, xfs_off_t,
+                                xfs_fsize_t, xfs_fsize_t);
+extern void xfs_inval_cached_pages(struct vnode *, struct xfs_iocore *,
+                                xfs_off_t, int, int);
+extern ssize_t xfs_read(struct bhv_desc *, struct kiocb *,
+                                const struct iovec *, unsigned int,
+                                loff_t *, int, struct cred *);
+extern ssize_t xfs_write(struct bhv_desc *, struct kiocb *,
+                                const struct iovec *, unsigned int,
+                                loff_t *, int, struct cred *);
+extern ssize_t xfs_sendfile(struct bhv_desc *, struct file *,
+                                loff_t *, int, size_t, read_actor_t,
+                                void *, struct cred *);
+extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
+#define XFS_FSB_TO_DB_IO(io,fsb) \
+                (((io)->io_flags & XFS_IOCORE_RT) ? \
+                 XFS_FSB_TO_BB((io)->io_mount, (fsb)) : \
+                 XFS_FSB_TO_DADDR((io)->io_mount, (fsb)))
+#endif  /* __XFS_LRW_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c
new file mode 100644
index 000000000000..aaf5ddba47f3
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_stats.c
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#include "xfs.h"
+#include <linux/proc_fs.h>
+DEFINE_PER_CPU(struct xfsstats, xfsstats);
+STATIC int
+xfs_read_xfsstats(
+        char            *buffer,
+        char            **start,
+        off_t           offset,
+        int             count,
+        int             *eof,
+        void            *data)
+{
+        int             c, i, j, len, val;
+        __uint64_t      xs_xstrat_bytes = 0;
+        __uint64_t      xs_write_bytes = 0;
+        __uint64_t      xs_read_bytes = 0;
+        static struct xstats_entry {
+                char    *desc;
+                int     endpoint;
+        } xstats[] = {
+                { "extent_alloc",       XFSSTAT_END_EXTENT_ALLOC        },
+                { "abt",                XFSSTAT_END_ALLOC_BTREE         },
+                { "blk_map",            XFSSTAT_END_BLOCK_MAPPING       },
+                { "bmbt",               XFSSTAT_END_BLOCK_MAP_BTREE     },
+                { "dir",                XFSSTAT_END_DIRECTORY_OPS       },
+                { "trans",              XFSSTAT_END_TRANSACTIONS        },
+                { "ig",                 XFSSTAT_END_INODE_OPS           },
+                { "log",                XFSSTAT_END_LOG_OPS             },
+                { "push_ail",           XFSSTAT_END_TAIL_PUSHING        },
+                { "xstrat",             XFSSTAT_END_WRITE_CONVERT       },
+                { "rw",                 XFSSTAT_END_READ_WRITE_OPS      },
+                { "attr",               XFSSTAT_END_ATTRIBUTE_OPS       },
+                { "icluster",           XFSSTAT_END_INODE_CLUSTER       },
+                { "vnodes",             XFSSTAT_END_VNODE_OPS           },
+                { "buf",                XFSSTAT_END_BUF                 },
+        };
+        /* Loop over all stats groups */
+        for (i=j=len = 0; i < sizeof(xstats)/sizeof(struct xstats_entry); i++) {
+                len += sprintf(buffer + len, xstats[i].desc);
+                /* inner loop does each group */
+                while (j < xstats[i].endpoint) {
+                        val = 0;
+                        /* sum over all cpus */
+                        for (c = 0; c < NR_CPUS; c++) {
+                                if (!cpu_possible(c)) continue;
+                                val += *(((__u32*)&per_cpu(xfsstats, c) + j));
+                        }
+                        len += sprintf(buffer + len, " %u", val);
+                        j++;
+                }
+                buffer[len++] = '\n';
+        }
+        /* extra precision counters */
+        for (i = 0; i < NR_CPUS; i++) {
+                if (!cpu_possible(i)) continue;
+                xs_xstrat_bytes += per_cpu(xfsstats, i).xs_xstrat_bytes;
+                xs_write_bytes += per_cpu(xfsstats, i).xs_write_bytes;
+                xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes;
+        }
+        len += sprintf(buffer + len, "xpc %Lu %Lu %Lu\n",
+                        xs_xstrat_bytes, xs_write_bytes, xs_read_bytes);
+        len += sprintf(buffer + len, "debug %u\n",
+#if defined(DEBUG)
+                1);
+#else
+                0);
+#endif
+        if (offset >= len) {
+                *start = buffer;
+                *eof = 1;
+                return 0;
+        }
+        *start = buffer + offset;
+        if ((len -= offset) > count)
+                return count;
+        *eof = 1;
+        return len;
+}
+void
+xfs_init_procfs(void)
+{
+        if (!proc_mkdir("fs/xfs", NULL))
+                return;
+        create_proc_read_entry("fs/xfs/stat", 0, NULL, xfs_read_xfsstats, NULL);
+}
+void
+xfs_cleanup_procfs(void)
+{
+        remove_proc_entry("fs/xfs/stat", NULL);
+        remove_proc_entry("fs/xfs", NULL);
+}
diff --git a/fs/xfs/linux-2.6/xfs_stats.h b/fs/xfs/linux-2.6/xfs_stats.h
new file mode 100644
index 000000000000..3f756a6c3eb0
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_stats.h
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_STATS_H__
+#define __XFS_STATS_H__
+#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF)
+#include <linux/percpu.h>
+/*
+ * XFS global statistics
+ */
+struct xfsstats {
+# define XFSSTAT_END_EXTENT_ALLOC       4
+        __uint32_t              xs_allocx;
+        __uint32_t              xs_allocb;
+        __uint32_t              xs_freex;
+        __uint32_t              xs_freeb;
+# define XFSSTAT_END_ALLOC_BTREE        (XFSSTAT_END_EXTENT_ALLOC+4)
+        __uint32_t              xs_abt_lookup;
+        __uint32_t              xs_abt_compare;
+        __uint32_t              xs_abt_insrec;
+        __uint32_t              xs_abt_delrec;
+# define XFSSTAT_END_BLOCK_MAPPING      (XFSSTAT_END_ALLOC_BTREE+7)
+        __uint32_t              xs_blk_mapr;
+        __uint32_t              xs_blk_mapw;
+        __uint32_t              xs_blk_unmap;
+        __uint32_t              xs_add_exlist;
+        __uint32_t              xs_del_exlist;
+        __uint32_t              xs_look_exlist;
+        __uint32_t              xs_cmp_exlist;
+# define XFSSTAT_END_BLOCK_MAP_BTREE    (XFSSTAT_END_BLOCK_MAPPING+4)
+        __uint32_t              xs_bmbt_lookup;
+        __uint32_t              xs_bmbt_compare;
+        __uint32_t              xs_bmbt_insrec;
+        __uint32_t              xs_bmbt_delrec;
+# define XFSSTAT_END_DIRECTORY_OPS      (XFSSTAT_END_BLOCK_MAP_BTREE+4)
+        __uint32_t              xs_dir_lookup;
+        __uint32_t              xs_dir_create;
+        __uint32_t              xs_dir_remove;
+        __uint32_t              xs_dir_getdents;
+# define XFSSTAT_END_TRANSACTIONS       (XFSSTAT_END_DIRECTORY_OPS+3)
+        __uint32_t              xs_trans_sync;
+        __uint32_t              xs_trans_async;
+        __uint32_t              xs_trans_empty;
+# define XFSSTAT_END_INODE_OPS          (XFSSTAT_END_TRANSACTIONS+7)
+        __uint32_t              xs_ig_attempts;
+        __uint32_t              xs_ig_found;
+        __uint32_t              xs_ig_frecycle;
+        __uint32_t              xs_ig_missed;
+        __uint32_t              xs_ig_dup;
+        __uint32_t              xs_ig_reclaims;
+        __uint32_t              xs_ig_attrchg;
+# define XFSSTAT_END_LOG_OPS            (XFSSTAT_END_INODE_OPS+5)
+        __uint32_t              xs_log_writes;
+        __uint32_t              xs_log_blocks;
+        __uint32_t              xs_log_noiclogs;
+        __uint32_t              xs_log_force;
+        __uint32_t              xs_log_force_sleep;
+# define XFSSTAT_END_TAIL_PUSHING       (XFSSTAT_END_LOG_OPS+10)
+        __uint32_t              xs_try_logspace;
+        __uint32_t              xs_sleep_logspace;
+        __uint32_t              xs_push_ail;
+        __uint32_t              xs_push_ail_success;
+        __uint32_t              xs_push_ail_pushbuf;
+        __uint32_t              xs_push_ail_pinned;
+        __uint32_t              xs_push_ail_locked;
+        __uint32_t              xs_push_ail_flushing;
+        __uint32_t              xs_push_ail_restarts;
+        __uint32_t              xs_push_ail_flush;
+# define XFSSTAT_END_WRITE_CONVERT      (XFSSTAT_END_TAIL_PUSHING+2)
+        __uint32_t              xs_xstrat_quick;
+        __uint32_t              xs_xstrat_split;
+# define XFSSTAT_END_READ_WRITE_OPS     (XFSSTAT_END_WRITE_CONVERT+2)
+        __uint32_t              xs_write_calls;
+        __uint32_t              xs_read_calls;
+# define XFSSTAT_END_ATTRIBUTE_OPS      (XFSSTAT_END_READ_WRITE_OPS+4)
+        __uint32_t              xs_attr_get;
+        __uint32_t              xs_attr_set;
+        __uint32_t              xs_attr_remove;
+        __uint32_t              xs_attr_list;
+# define XFSSTAT_END_INODE_CLUSTER      (XFSSTAT_END_ATTRIBUTE_OPS+3)
+        __uint32_t              xs_iflush_count;
+        __uint32_t              xs_icluster_flushcnt;
+        __uint32_t              xs_icluster_flushinode;
+# define XFSSTAT_END_VNODE_OPS          (XFSSTAT_END_INODE_CLUSTER+8)
+        __uint32_t              vn_active;      /* # vnodes not on free lists */
+        __uint32_t              vn_alloc;       /* # times vn_alloc called */
+        __uint32_t              vn_get;         /* # times vn_get called */
+        __uint32_t              vn_hold;        /* # times vn_hold called */
+        __uint32_t              vn_rele;        /* # times vn_rele called */
+        __uint32_t              vn_reclaim;     /* # times vn_reclaim called */
+        __uint32_t              vn_remove;      /* # times vn_remove called */
+        __uint32_t              vn_free;        /* # times vn_free called */
+#define XFSSTAT_END_BUF                 (XFSSTAT_END_VNODE_OPS+9)
+        __uint32_t              pb_get;
+        __uint32_t              pb_create;
+        __uint32_t              pb_get_locked;
+        __uint32_t              pb_get_locked_waited;
+        __uint32_t              pb_busy_locked;
+        __uint32_t              pb_miss_locked;
+        __uint32_t              pb_page_retries;
+        __uint32_t              pb_page_found;
+        __uint32_t              pb_get_read;
+/* Extra precision counters */
+        __uint64_t              xs_xstrat_bytes;
+        __uint64_t              xs_write_bytes;
+        __uint64_t              xs_read_bytes;
+};
+DECLARE_PER_CPU(struct xfsstats, xfsstats);
+/*
+ * We don't disable preempt, not too worried about poking the
+ * wrong CPU's stat for now (also aggregated before reporting).
+ */
+#define XFS_STATS_INC(v)        (per_cpu(xfsstats, current_cpu()).v++)
+#define XFS_STATS_DEC(v)        (per_cpu(xfsstats, current_cpu()).v--)
+#define XFS_STATS_ADD(v, inc)   (per_cpu(xfsstats, current_cpu()).v += (inc))
+extern void xfs_init_procfs(void);
+extern void xfs_cleanup_procfs(void);
+#else   /* !CONFIG_PROC_FS */
+# define XFS_STATS_INC(count)
+# define XFS_STATS_DEC(count)
+# define XFS_STATS_ADD(count, inc)
+static __inline void xfs_init_procfs(void) { };
+static __inline void xfs_cleanup_procfs(void) { };
+#endif  /* !CONFIG_PROC_FS */
+#endif /* __XFS_STATS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
new file mode 100644
index 000000000000..53dc658cafa6
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -0,0 +1,912 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#include "xfs.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_clnt.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_dir.h"
+#include "xfs_dir2.h"
+#include "xfs_alloc.h"
+#include "xfs_dmapi.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dir_sf.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_bmap.h"
+#include "xfs_bit.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_itable.h"
+#include "xfs_rw.h"
+#include "xfs_acl.h"
+#include "xfs_cap.h"
+#include "xfs_mac.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_utils.h"
+#include "xfs_version.h"
+#include "xfs_ioctl32.h"
+#include <linux/namei.h>
+#include <linux/init.h>
+#include <linux/mount.h>
+#include <linux/writeback.h>
+STATIC struct quotactl_ops linvfs_qops;
+STATIC struct super_operations linvfs_sops;
+STATIC kmem_zone_t *linvfs_inode_zone;
+STATIC struct xfs_mount_args *
+xfs_args_allocate(
+        struct super_block      *sb)
+{
+        struct xfs_mount_args   *args;
+        args = kmem_zalloc(sizeof(struct xfs_mount_args), KM_SLEEP);
+        args->logbufs = args->logbufsize = -1;
+        strncpy(args->fsname, sb->s_id, MAXNAMELEN);
+        /* Copy the already-parsed mount(2) flags we're interested in */
+        if (sb->s_flags & MS_NOATIME)
+                args->flags |= XFSMNT_NOATIME;
+        if (sb->s_flags & MS_DIRSYNC)
+                args->flags |= XFSMNT_DIRSYNC;
+        if (sb->s_flags & MS_SYNCHRONOUS)
+                args->flags |= XFSMNT_WSYNC;
+        /* Default to 32 bit inodes on Linux all the time */
+        args->flags |= XFSMNT_32BITINODES;
+        return args;
+}
+__uint64_t
+xfs_max_file_offset(
+        unsigned int            blockshift)
+{
+        unsigned int            pagefactor = 1;
+        unsigned int            bitshift = BITS_PER_LONG - 1;
+        /* Figure out maximum filesize, on Linux this can depend on
+         * the filesystem blocksize (on 32 bit platforms).
+         * __block_prepare_write does this in an [unsigned] long...
+         *      page->index << (PAGE_CACHE_SHIFT - bbits)
+         * So, for page sized blocks (4K on 32 bit platforms),
+         * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
+         *      (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1)
+         * but for smaller blocksizes it is less (bbits = log2 bsize).
+         * Note1: get_block_t takes a long (implicit cast from above)
+         * Note2: The Large Block Device (LBD and HAVE_SECTOR_T) patch
+         * can optionally convert the [unsigned] long from above into
+         * an [unsigned] long long.
+         */
+#if BITS_PER_LONG == 32
+# if defined(CONFIG_LBD)
+        ASSERT(sizeof(sector_t) == 8);
+        pagefactor = PAGE_CACHE_SIZE;
+        bitshift = BITS_PER_LONG;
+# else
+        pagefactor = PAGE_CACHE_SIZE >> (PAGE_CACHE_SHIFT - blockshift);
+# endif
+#endif
+        return (((__uint64_t)pagefactor) << bitshift) - 1;
+}
+STATIC __inline__ void
+xfs_set_inodeops(
+        struct inode            *inode)
+{
+        vnode_t                 *vp = LINVFS_GET_VP(inode);
+        if (vp->v_type == VNON) {
+                vn_mark_bad(vp);
+        } else if (S_ISREG(inode->i_mode)) {
+                inode->i_op = &linvfs_file_inode_operations;
+                inode->i_fop = &linvfs_file_operations;
+                inode->i_mapping->a_ops = &linvfs_aops;
+        } else if (S_ISDIR(inode->i_mode)) {
+                inode->i_op = &linvfs_dir_inode_operations;
+                inode->i_fop = &linvfs_dir_operations;
+        } else if (S_ISLNK(inode->i_mode)) {
+                inode->i_op = &linvfs_symlink_inode_operations;
+                if (inode->i_blocks)
+                        inode->i_mapping->a_ops = &linvfs_aops;
+        } else {
+                inode->i_op = &linvfs_file_inode_operations;
+                init_special_inode(inode, inode->i_mode, inode->i_rdev);
+        }
+}
+STATIC __inline__ void
+xfs_revalidate_inode(
+        xfs_mount_t             *mp,
+        vnode_t                 *vp,
+        xfs_inode_t             *ip)
+{
+        struct inode            *inode = LINVFS_GET_IP(vp);
+        inode->i_mode   = (ip->i_d.di_mode & MODEMASK) | VTTOIF(vp->v_type);
+        inode->i_nlink  = ip->i_d.di_nlink;
+        inode->i_uid    = ip->i_d.di_uid;
+        inode->i_gid    = ip->i_d.di_gid;
+        if (((1 << vp->v_type) & ((1<<VBLK) | (1<<VCHR))) == 0) {
+                inode->i_rdev = 0;
+        } else {
+                xfs_dev_t dev = ip->i_df.if_u2.if_rdev;
+                inode->i_rdev = MKDEV(sysv_major(dev) & 0x1ff, sysv_minor(dev));
+        }
+        inode->i_blksize = PAGE_CACHE_SIZE;
+        inode->i_generation = ip->i_d.di_gen;
+        i_size_write(inode, ip->i_d.di_size);
+        inode->i_blocks =
+                XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks);
+        inode->i_atime.tv_sec   = ip->i_d.di_atime.t_sec;
+        inode->i_atime.tv_nsec  = ip->i_d.di_atime.t_nsec;
+        inode->i_mtime.tv_sec   = ip->i_d.di_mtime.t_sec;
+        inode->i_mtime.tv_nsec  = ip->i_d.di_mtime.t_nsec;
+        inode->i_ctime.tv_sec   = ip->i_d.di_ctime.t_sec;
+        inode->i_ctime.tv_nsec  = ip->i_d.di_ctime.t_nsec;
+        if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE)
+                inode->i_flags |= S_IMMUTABLE;
+        else
+                inode->i_flags &= ~S_IMMUTABLE;
+        if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
+                inode->i_flags |= S_APPEND;
+        else
+                inode->i_flags &= ~S_APPEND;
+        if (ip->i_d.di_flags & XFS_DIFLAG_SYNC)
+                inode->i_flags |= S_SYNC;
+        else
+                inode->i_flags &= ~S_SYNC;
+        if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME)
+                inode->i_flags |= S_NOATIME;
+        else
+                inode->i_flags &= ~S_NOATIME;
+        vp->v_flag &= ~VMODIFIED;
+}
+void
+xfs_initialize_vnode(
+        bhv_desc_t              *bdp,
+        vnode_t                 *vp,
+        bhv_desc_t              *inode_bhv,
+        int                     unlock)
+{
+        xfs_inode_t             *ip = XFS_BHVTOI(inode_bhv);
+        struct inode            *inode = LINVFS_GET_IP(vp);
+        if (!inode_bhv->bd_vobj) {
+                vp->v_vfsp = bhvtovfs(bdp);
+                bhv_desc_init(inode_bhv, ip, vp, &xfs_vnodeops);
+                bhv_insert(VN_BHV_HEAD(vp), inode_bhv);
+        }
+        /*
+         * We need to set the ops vectors, and unlock the inode, but if
+         * we have been called during the new inode create process, it is
+         * too early to fill in the Linux inode.  We will get called a
+         * second time once the inode is properly set up, and then we can
+         * finish our work.
+         */
+        if (ip->i_d.di_mode != 0 && unlock && (inode->i_state & I_NEW)) {
+                vp->v_type = IFTOVT(ip->i_d.di_mode);
+                xfs_revalidate_inode(XFS_BHVTOM(bdp), vp, ip);
+                xfs_set_inodeops(inode);
+        
+                ip->i_flags &= ~XFS_INEW;
+                barrier();
+                unlock_new_inode(inode);
+        }
+}
+int
+xfs_blkdev_get(
+        xfs_mount_t             *mp,
+        const char              *name,
+        struct block_device     **bdevp)
+{
+        int                     error = 0;
+        *bdevp = open_bdev_excl(name, 0, mp);
+        if (IS_ERR(*bdevp)) {
+                error = PTR_ERR(*bdevp);
+                printk("XFS: Invalid device [%s], error=%d\n", name, error);
+        }
+        return -error;
+}
+void
+xfs_blkdev_put(
+        struct block_device     *bdev)
+{
+        if (bdev)
+                close_bdev_excl(bdev);
+}
+STATIC struct inode *
+linvfs_alloc_inode(
+        struct super_block      *sb)
+{
+        vnode_t                 *vp;
+        vp = (vnode_t *)kmem_cache_alloc(linvfs_inode_zone, 
+                kmem_flags_convert(KM_SLEEP));
+        if (!vp)
+                return NULL;
+        return LINVFS_GET_IP(vp);
+}
+STATIC void
+linvfs_destroy_inode(
+        struct inode            *inode)
+{
+        kmem_cache_free(linvfs_inode_zone, LINVFS_GET_VP(inode));
+}
+STATIC void
+init_once(
+        void                    *data,
+        kmem_cache_t            *cachep,
+        unsigned long           flags)
+{
+        vnode_t                 *vp = (vnode_t *)data;
+        if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
+            SLAB_CTOR_CONSTRUCTOR)
+                inode_init_once(LINVFS_GET_IP(vp));
+}
+STATIC int
+init_inodecache( void )
+{
+        linvfs_inode_zone = kmem_cache_create("linvfs_icache",
+                                sizeof(vnode_t), 0, SLAB_RECLAIM_ACCOUNT,
+                                init_once, NULL);
+        if (linvfs_inode_zone == NULL)
+                return -ENOMEM;
+        return 0;
+}
+STATIC void
+destroy_inodecache( void )
+{
+        if (kmem_cache_destroy(linvfs_inode_zone))
+                printk(KERN_WARNING "%s: cache still in use!\n", __FUNCTION__);
+}
+/*
+ * Attempt to flush the inode, this will actually fail
+ * if the inode is pinned, but we dirty the inode again
+ * at the point when it is unpinned after a log write,
+ * since this is when the inode itself becomes flushable. 
+ */
+STATIC int
+linvfs_write_inode(
+        struct inode            *inode,
+        int                     sync)
+{
+        vnode_t                 *vp = LINVFS_GET_VP(inode);
+        int                     error = 0, flags = FLUSH_INODE;
+        if (vp) {
+                vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
+                if (sync)
+                        flags |= FLUSH_SYNC;
+                VOP_IFLUSH(vp, flags, error);
+                if (error == EAGAIN) {
+                        if (sync)
+                                VOP_IFLUSH(vp, flags | FLUSH_LOG, error);
+                        else
+                                error = 0;
+                }
+        }
+        return -error;
+}
+STATIC void
+linvfs_clear_inode(
+        struct inode            *inode)
+{
+        vnode_t                 *vp = LINVFS_GET_VP(inode);
+        if (vp) {
+                vn_rele(vp);
+                vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
+                /*
+                 * Do all our cleanup, and remove this vnode.
+                 */
+                vn_remove(vp);
+        }
+}
+/*
+ * Enqueue a work item to be picked up by the vfs xfssyncd thread.
+ * Doing this has two advantages:
+ * - It saves on stack space, which is tight in certain situations
+ * - It can be used (with care) as a mechanism to avoid deadlocks.
+ * Flushing while allocating in a full filesystem requires both.
+ */
+STATIC void
+xfs_syncd_queue_work(
+        struct vfs      *vfs,
+        void            *data,
+        void            (*syncer)(vfs_t *, void *))
+{
+        vfs_sync_work_t *work;
+        work = kmem_alloc(sizeof(struct vfs_sync_work), KM_SLEEP);
+        INIT_LIST_HEAD(&work->w_list);
+        work->w_syncer = syncer;
+        work->w_data = data;
+        work->w_vfs = vfs;
+        spin_lock(&vfs->vfs_sync_lock);
+        list_add_tail(&work->w_list, &vfs->vfs_sync_list);
+        spin_unlock(&vfs->vfs_sync_lock);
+        wake_up_process(vfs->vfs_sync_task);
+}
+/*
+ * Flush delayed allocate data, attempting to free up reserved space
+ * from existing allocations.  At this point a new allocation attempt
+ * has failed with ENOSPC and we are in the process of scratching our
+ * heads, looking about for more room...
+ */
+STATIC void
+xfs_flush_inode_work(
+        vfs_t           *vfs,
+        void            *inode)
+{
+        filemap_flush(((struct inode *)inode)->i_mapping);
+        iput((struct inode *)inode);
+}
+void
+xfs_flush_inode(
+        xfs_inode_t     *ip)
+{
+        struct inode    *inode = LINVFS_GET_IP(XFS_ITOV(ip));
+        struct vfs      *vfs = XFS_MTOVFS(ip->i_mount);
+        igrab(inode);
+        xfs_syncd_queue_work(vfs, inode, xfs_flush_inode_work);
+        delay(HZ/2);
+}
+/*
+ * This is the "bigger hammer" version of xfs_flush_inode_work...
+ * (IOW, "If at first you don't succeed, use a Bigger Hammer").
+ */
+STATIC void
+xfs_flush_device_work(
+        vfs_t           *vfs,
+        void            *inode)
+{
+        sync_blockdev(vfs->vfs_super->s_bdev);
+        iput((struct inode *)inode);
+}
+void
+xfs_flush_device(
+        xfs_inode_t     *ip)
+{
+        struct inode    *inode = LINVFS_GET_IP(XFS_ITOV(ip));
+        struct vfs      *vfs = XFS_MTOVFS(ip->i_mount);
+        igrab(inode);
+        xfs_syncd_queue_work(vfs, inode, xfs_flush_device_work);
+        delay(HZ/2);
+        xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC);
+}
+#define SYNCD_FLAGS     (SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR)
+STATIC void
+vfs_sync_worker(
+        vfs_t           *vfsp,
+        void            *unused)
+{
+        int             error;
+        if (!(vfsp->vfs_flag & VFS_RDONLY))
+                VFS_SYNC(vfsp, SYNCD_FLAGS, NULL, error);
+        vfsp->vfs_sync_seq++;
+        wmb();
+        wake_up(&vfsp->vfs_wait_single_sync_task);
+}
+STATIC int
+xfssyncd(
+        void                    *arg)
+{
+        long                    timeleft;
+        vfs_t                   *vfsp = (vfs_t *) arg;
+        struct list_head        tmp;
+        struct vfs_sync_work    *work, *n;
+        daemonize("xfssyncd");
+        vfsp->vfs_sync_work.w_vfs = vfsp;
+        vfsp->vfs_sync_work.w_syncer = vfs_sync_worker;
+        vfsp->vfs_sync_task = current;
+        wmb();
+        wake_up(&vfsp->vfs_wait_sync_task);
+        INIT_LIST_HEAD(&tmp);
+        timeleft = (xfs_syncd_centisecs * HZ) / 100;
+        for (;;) {
+                set_current_state(TASK_INTERRUPTIBLE);
+                timeleft = schedule_timeout(timeleft);
+                /* swsusp */
+                try_to_freeze(PF_FREEZE);
+                if (vfsp->vfs_flag & VFS_UMOUNT)
+                        break;
+                spin_lock(&vfsp->vfs_sync_lock);
+                /*
+                 * We can get woken by laptop mode, to do a sync -
+                 * that's the (only!) case where the list would be
+                 * empty with time remaining.
+                 */
+                if (!timeleft || list_empty(&vfsp->vfs_sync_list)) {
+                        if (!timeleft)
+                                timeleft = (xfs_syncd_centisecs * HZ) / 100;
+                        INIT_LIST_HEAD(&vfsp->vfs_sync_work.w_list);
+                        list_add_tail(&vfsp->vfs_sync_work.w_list,
+                                        &vfsp->vfs_sync_list);
+                }
+                list_for_each_entry_safe(work, n, &vfsp->vfs_sync_list, w_list)
+                        list_move(&work->w_list, &tmp);
+                spin_unlock(&vfsp->vfs_sync_lock);
+                list_for_each_entry_safe(work, n, &tmp, w_list) {
+                        (*work->w_syncer)(vfsp, work->w_data);
+                        list_del(&work->w_list);
+                        if (work == &vfsp->vfs_sync_work)
+                                continue;
+                        kmem_free(work, sizeof(struct vfs_sync_work));
+                }
+        }
+        vfsp->vfs_sync_task = NULL;
+        wmb();
+        wake_up(&vfsp->vfs_wait_sync_task);
+        return 0;
+}
+STATIC int
+linvfs_start_syncd(
+        vfs_t                   *vfsp)
+{
+        int                     pid;
+        pid = kernel_thread(xfssyncd, (void *) vfsp,
+                        CLONE_VM | CLONE_FS | CLONE_FILES);
+        if (pid < 0)
+                return -pid;
+        wait_event(vfsp->vfs_wait_sync_task, vfsp->vfs_sync_task);
+        return 0;
+}
+STATIC void
+linvfs_stop_syncd(
+        vfs_t                   *vfsp)
+{
+        vfsp->vfs_flag |= VFS_UMOUNT;
+        wmb();
+        wake_up_process(vfsp->vfs_sync_task);
+        wait_event(vfsp->vfs_wait_sync_task, !vfsp->vfs_sync_task);
+}
+STATIC void
+linvfs_put_super(
+        struct super_block      *sb)
+{
+        vfs_t                   *vfsp = LINVFS_GET_VFS(sb);
+        int                     error;
+        linvfs_stop_syncd(vfsp);
+        VFS_SYNC(vfsp, SYNC_ATTR|SYNC_DELWRI, NULL, error);
+        if (!error)
+                VFS_UNMOUNT(vfsp, 0, NULL, error);
+        if (error) {
+                printk("XFS unmount got error %d\n", error);
+                printk("%s: vfsp/0x%p left dangling!\n", __FUNCTION__, vfsp);
+                return;
+        }
+        vfs_deallocate(vfsp);
+}
+STATIC void
+linvfs_write_super(
+        struct super_block      *sb)
+{
+        vfs_t                   *vfsp = LINVFS_GET_VFS(sb);
+        int                     error;
+        if (sb->s_flags & MS_RDONLY) {
+                sb->s_dirt = 0; /* paranoia */
+                return;
+        }
+        /* Push the log and superblock a little */
+        VFS_SYNC(vfsp, SYNC_FSDATA, NULL, error);
+        sb->s_dirt = 0;
+}
+STATIC int
+linvfs_sync_super(
+        struct super_block      *sb,
+        int                     wait)
+{
+        vfs_t           *vfsp = LINVFS_GET_VFS(sb);
+        int             error;
+        int             flags = SYNC_FSDATA;
+        if (wait)
+                flags |= SYNC_WAIT;
+        VFS_SYNC(vfsp, flags, NULL, error);
+        sb->s_dirt = 0;
+        if (unlikely(laptop_mode)) {
+                int     prev_sync_seq = vfsp->vfs_sync_seq;
+                /*
+                 * The disk must be active because we're syncing.
+                 * We schedule xfssyncd now (now that the disk is
+                 * active) instead of later (when it might not be).
+                 */
+                wake_up_process(vfsp->vfs_sync_task);
+                /*
+                 * We have to wait for the sync iteration to complete.
+                 * If we don't, the disk activity caused by the sync
+                 * will come after the sync is completed, and that
+                 * triggers another sync from laptop mode.
+                 */
+                wait_event(vfsp->vfs_wait_single_sync_task,
+                                vfsp->vfs_sync_seq != prev_sync_seq);
+        }
+        return -error;
+}
+STATIC int
+linvfs_statfs(
+        struct super_block      *sb,
+        struct kstatfs          *statp)
+{
+        vfs_t                   *vfsp = LINVFS_GET_VFS(sb);
+        int                     error;
+        VFS_STATVFS(vfsp, statp, NULL, error);
+        return -error;
+}
+STATIC int
+linvfs_remount(
+        struct super_block      *sb,
+        int                     *flags,
+        char                    *options)
+{
+        vfs_t                   *vfsp = LINVFS_GET_VFS(sb);
+        struct xfs_mount_args   *args = xfs_args_allocate(sb);
+        int                     error;
+        VFS_PARSEARGS(vfsp, options, args, 1, error);
+        if (!error)
+                VFS_MNTUPDATE(vfsp, flags, args, error);
+        kmem_free(args, sizeof(*args));
+        return -error;
+}
+STATIC void
+linvfs_freeze_fs(
+        struct super_block      *sb)
+{
+        VFS_FREEZE(LINVFS_GET_VFS(sb));
+}
+STATIC int
+linvfs_show_options(
+        struct seq_file         *m,
+        struct vfsmount         *mnt)
+{
+        struct vfs              *vfsp = LINVFS_GET_VFS(mnt->mnt_sb);
+        int                     error;
+        VFS_SHOWARGS(vfsp, m, error);
+        return error;
+}
+STATIC int
+linvfs_getxstate(
+        struct super_block      *sb,
+        struct fs_quota_stat    *fqs)
+{
+        struct vfs              *vfsp = LINVFS_GET_VFS(sb);
+        int                     error;
+        VFS_QUOTACTL(vfsp, Q_XGETQSTAT, 0, (caddr_t)fqs, error);
+        return -error;
+}
+STATIC int
+linvfs_setxstate(
+        struct super_block      *sb,
+        unsigned int            flags,
+        int                     op)
+{
+        struct vfs              *vfsp = LINVFS_GET_VFS(sb);
+        int                     error;
+        VFS_QUOTACTL(vfsp, op, 0, (caddr_t)&flags, error);
+        return -error;
+}
+STATIC int
+linvfs_getxquota(
+        struct super_block      *sb,
+        int                     type,
+        qid_t                   id,
+        struct fs_disk_quota    *fdq)
+{
+        struct vfs              *vfsp = LINVFS_GET_VFS(sb);
+        int                     error, getmode;
+        getmode = (type == GRPQUOTA) ? Q_XGETGQUOTA : Q_XGETQUOTA;
+        VFS_QUOTACTL(vfsp, getmode, id, (caddr_t)fdq, error);
+        return -error;
+}
+STATIC int
+linvfs_setxquota(
+        struct super_block      *sb,
+        int                     type,
+        qid_t                   id,
+        struct fs_disk_quota    *fdq)
+{
+        struct vfs              *vfsp = LINVFS_GET_VFS(sb);
+        int                     error, setmode;
+        setmode = (type == GRPQUOTA) ? Q_XSETGQLIM : Q_XSETQLIM;
+        VFS_QUOTACTL(vfsp, setmode, id, (caddr_t)fdq, error);
+        return -error;
+}
+STATIC int
+linvfs_fill_super(
+        struct super_block      *sb,
+        void                    *data,
+        int                     silent)
+{
+        vnode_t                 *rootvp;
+        struct vfs              *vfsp = vfs_allocate();
+        struct xfs_mount_args   *args = xfs_args_allocate(sb);
+        struct kstatfs          statvfs;
+        int                     error, error2;
+        vfsp->vfs_super = sb;
+        LINVFS_SET_VFS(sb, vfsp);
+        if (sb->s_flags & MS_RDONLY)
+                vfsp->vfs_flag |= VFS_RDONLY;
+        bhv_insert_all_vfsops(vfsp);
+        VFS_PARSEARGS(vfsp, (char *)data, args, 0, error);
+        if (error) {
+                bhv_remove_all_vfsops(vfsp, 1);
+                goto fail_vfsop;
+        }
+        sb_min_blocksize(sb, BBSIZE);
+#ifdef CONFIG_XFS_EXPORT
+        sb->s_export_op = &linvfs_export_ops;
+#endif
+        sb->s_qcop = &linvfs_qops;
+        sb->s_op = &linvfs_sops;
+        VFS_MOUNT(vfsp, args, NULL, error);
+        if (error) {
+                bhv_remove_all_vfsops(vfsp, 1);
+                goto fail_vfsop;
+        }
+        VFS_STATVFS(vfsp, &statvfs, NULL, error);
+        if (error)
+                goto fail_unmount;
+        sb->s_dirt = 1;
+        sb->s_magic = statvfs.f_type;
+        sb->s_blocksize = statvfs.f_bsize;
+        sb->s_blocksize_bits = ffs(statvfs.f_bsize) - 1;
+        sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits);
+        sb->s_time_gran = 1;
+        set_posix_acl_flag(sb);
+        VFS_ROOT(vfsp, &rootvp, error);
+        if (error)
+                goto fail_unmount;
+        sb->s_root = d_alloc_root(LINVFS_GET_IP(rootvp));
+        if (!sb->s_root) {
+                error = ENOMEM;
+                goto fail_vnrele;
+        }
+        if (is_bad_inode(sb->s_root->d_inode)) {
+                error = EINVAL;
+                goto fail_vnrele;
+        }
+        if ((error = linvfs_start_syncd(vfsp)))
+                goto fail_vnrele;
+        vn_trace_exit(rootvp, __FUNCTION__, (inst_t *)__return_address);
+        kmem_free(args, sizeof(*args));
+        return 0;
+fail_vnrele:
+        if (sb->s_root) {
+                dput(sb->s_root);
+                sb->s_root = NULL;
+        } else {
+                VN_RELE(rootvp);
+        }
+fail_unmount:
+        VFS_UNMOUNT(vfsp, 0, NULL, error2);
+fail_vfsop:
+        vfs_deallocate(vfsp);
+        kmem_free(args, sizeof(*args));
+        return -error;
+}
+STATIC struct super_block *
+linvfs_get_sb(
+        struct file_system_type *fs_type,
+        int                     flags,
+        const char              *dev_name,
+        void                    *data)
+{
+        return get_sb_bdev(fs_type, flags, dev_name, data, linvfs_fill_super);
+}
+STATIC struct super_operations linvfs_sops = {
+        .alloc_inode            = linvfs_alloc_inode,
+        .destroy_inode          = linvfs_destroy_inode,
+        .write_inode            = linvfs_write_inode,
+        .clear_inode            = linvfs_clear_inode,
+        .put_super              = linvfs_put_super,
+        .write_super            = linvfs_write_super,
+        .sync_fs                = linvfs_sync_super,
+        .write_super_lockfs     = linvfs_freeze_fs,
+        .statfs                 = linvfs_statfs,
+        .remount_fs             = linvfs_remount,
+        .show_options           = linvfs_show_options,
+};
+STATIC struct quotactl_ops linvfs_qops = {
+        .get_xstate             = linvfs_getxstate,
+        .set_xstate             = linvfs_setxstate,
+        .get_xquota             = linvfs_getxquota,
+        .set_xquota             = linvfs_setxquota,
+};
+STATIC struct file_system_type xfs_fs_type = {
+        .owner                  = THIS_MODULE,
+        .name                   = "xfs",
+        .get_sb                 = linvfs_get_sb,
+        .kill_sb                = kill_block_super,
+        .fs_flags               = FS_REQUIRES_DEV,
+};
+STATIC int __init
+init_xfs_fs( void )
+{
+        int                     error;
+        struct sysinfo          si;
+        static char             message[] __initdata = KERN_INFO \
+                XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled\n";
+        printk(message);
+        si_meminfo(&si);
+        xfs_physmem = si.totalram;
+        ktrace_init(64);
+        error = init_inodecache();
+        if (error < 0)
+                goto undo_inodecache;
+        error = pagebuf_init();
+        if (error < 0)
+                goto undo_pagebuf;
+        vn_init();
+        xfs_init();
+        uuid_init();
+        vfs_initquota();
+        error = register_filesystem(&xfs_fs_type);
+        if (error)
+                goto undo_register;
+        XFS_DM_INIT(&xfs_fs_type);
+        return 0;
+undo_register:
+        pagebuf_terminate();
+undo_pagebuf:
+        destroy_inodecache();
+undo_inodecache:
+        return error;
+}
+STATIC void __exit
+exit_xfs_fs( void )
+{
+        vfs_exitquota();
+        XFS_DM_EXIT(&xfs_fs_type);
+        unregister_filesystem(&xfs_fs_type);
+        xfs_cleanup();
+        pagebuf_terminate();
+        destroy_inodecache();
+        ktrace_uninit();
+}
+module_init(init_xfs_fs);
+module_exit(exit_xfs_fs);
+MODULE_AUTHOR("Silicon Graphics, Inc.");
+MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled");
+MODULE_LICENSE("GPL");
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
new file mode 100644
index 000000000000..ec7e0035c731
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SUPER_H__
+#define __XFS_SUPER_H__
+#ifdef CONFIG_XFS_DMAPI
+# define vfs_insertdmapi(vfs)   vfs_insertops(vfsp, &xfs_dmops)
+# define vfs_initdmapi()        dmapi_init()
+# define vfs_exitdmapi()        dmapi_uninit()
+#else
+# define vfs_insertdmapi(vfs)   do { } while (0)
+# define vfs_initdmapi()        do { } while (0)
+# define vfs_exitdmapi()        do { } while (0)
+#endif
+#ifdef CONFIG_XFS_QUOTA
+# define vfs_insertquota(vfs)   vfs_insertops(vfsp, &xfs_qmops)
+extern void xfs_qm_init(void);
+extern void xfs_qm_exit(void);
+# define vfs_initquota()        xfs_qm_init()
+# define vfs_exitquota()        xfs_qm_exit()
+#else
+# define vfs_insertquota(vfs)   do { } while (0)
+# define vfs_initquota()        do { } while (0)
+# define vfs_exitquota()        do { } while (0)
+#endif
+#ifdef CONFIG_XFS_POSIX_ACL
+# define XFS_ACL_STRING         "ACLs, "
+# define set_posix_acl_flag(sb) ((sb)->s_flags |= MS_POSIXACL)
+#else
+# define XFS_ACL_STRING
+# define set_posix_acl_flag(sb) do { } while (0)
+#endif
+#ifdef CONFIG_XFS_SECURITY
+# define XFS_SECURITY_STRING    "security attributes, "
+# define ENOSECURITY            0
+#else
+# define XFS_SECURITY_STRING
+# define ENOSECURITY            EOPNOTSUPP
+#endif
+#ifdef CONFIG_XFS_RT
+# define XFS_REALTIME_STRING    "realtime, "
+#else
+# define XFS_REALTIME_STRING
+#endif
+#if XFS_BIG_BLKNOS
+# if XFS_BIG_INUMS
+#  define XFS_BIGFS_STRING      "large block/inode numbers, "
+# else
+#  define XFS_BIGFS_STRING      "large block numbers, "
+# endif
+#else
+# define XFS_BIGFS_STRING
+#endif
+#ifdef CONFIG_XFS_TRACE
+# define XFS_TRACE_STRING       "tracing, "
+#else
+# define XFS_TRACE_STRING
+#endif
+#ifdef CONFIG_XFS_DMAPI
+# define XFS_DMAPI_STRING       "dmapi support, "
+#else
+# define XFS_DMAPI_STRING
+#endif
+#ifdef DEBUG
+# define XFS_DBG_STRING         "debug"
+#else
+# define XFS_DBG_STRING         "no debug"
+#endif
+#define XFS_BUILD_OPTIONS       XFS_ACL_STRING \
+                                XFS_SECURITY_STRING \
+                                XFS_REALTIME_STRING \
+                                XFS_BIGFS_STRING \
+                                XFS_TRACE_STRING \
+                                XFS_DMAPI_STRING \
+                                XFS_DBG_STRING /* DBG must be last */
+#define LINVFS_GET_VFS(s) \
+        (vfs_t *)((s)->s_fs_info)
+#define LINVFS_SET_VFS(s, vfsp) \
+        ((s)->s_fs_info = vfsp)
+struct xfs_inode;
+struct xfs_mount;
+struct xfs_buftarg;
+struct block_device;
+extern __uint64_t xfs_max_file_offset(unsigned int);
+extern void xfs_initialize_vnode(bhv_desc_t *, vnode_t *, bhv_desc_t *, int);
+extern void xfs_flush_inode(struct xfs_inode *);
+extern void xfs_flush_device(struct xfs_inode *);
+extern int  xfs_blkdev_get(struct xfs_mount *, const char *,
+                                struct block_device **);
+extern void xfs_blkdev_put(struct block_device *);
+extern struct export_operations linvfs_export_ops;
+#endif  /* __XFS_SUPER_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
new file mode 100644
index 000000000000..0dc010356f4d
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_sysctl.c
@@ -0,0 +1,174 @@
+/*
+ * Copyright (c) 2001-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#include "xfs.h"
+#include "xfs_rw.h"
+#include <linux/sysctl.h>
+#include <linux/proc_fs.h>
+static struct ctl_table_header *xfs_table_header;
+#ifdef CONFIG_PROC_FS
+STATIC int
+xfs_stats_clear_proc_handler(
+        ctl_table       *ctl,
+        int             write,
+        struct file     *filp,
+        void            __user *buffer,
+        size_t          *lenp,
+        loff_t          *ppos)
+{
+        int             c, ret, *valp = ctl->data;
+        __uint32_t      vn_active;
+        ret = proc_dointvec_minmax(ctl, write, filp, buffer, lenp, ppos);
+        if (!ret && write && *valp) {
+                printk("XFS Clearing xfsstats\n");
+                for (c = 0; c < NR_CPUS; c++) {
+                        if (!cpu_possible(c)) continue;
+                        preempt_disable();
+                        /* save vn_active, it's a universal truth! */
+                        vn_active = per_cpu(xfsstats, c).vn_active;
+                        memset(&per_cpu(xfsstats, c), 0,
+                               sizeof(struct xfsstats));
+                        per_cpu(xfsstats, c).vn_active = vn_active;
+                        preempt_enable();
+                }
+                xfs_stats_clear = 0;
+        }
+        return ret;
+}
+#endif /* CONFIG_PROC_FS */
+STATIC ctl_table xfs_table[] = {
+        {XFS_RESTRICT_CHOWN, "restrict_chown", &xfs_params.restrict_chown.val,
+        sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+        &sysctl_intvec, NULL, 
+        &xfs_params.restrict_chown.min, &xfs_params.restrict_chown.max},
+        {XFS_SGID_INHERIT, "irix_sgid_inherit", &xfs_params.sgid_inherit.val,
+        sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+        &sysctl_intvec, NULL,
+        &xfs_params.sgid_inherit.min, &xfs_params.sgid_inherit.max},
+        {XFS_SYMLINK_MODE, "irix_symlink_mode", &xfs_params.symlink_mode.val,
+        sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+        &sysctl_intvec, NULL, 
+        &xfs_params.symlink_mode.min, &xfs_params.symlink_mode.max},
+        {XFS_PANIC_MASK, "panic_mask", &xfs_params.panic_mask.val,
+        sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+        &sysctl_intvec, NULL, 
+        &xfs_params.panic_mask.min, &xfs_params.panic_mask.max},
+        {XFS_ERRLEVEL, "error_level", &xfs_params.error_level.val,
+        sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+        &sysctl_intvec, NULL, 
+        &xfs_params.error_level.min, &xfs_params.error_level.max},
+        {XFS_SYNCD_TIMER, "xfssyncd_centisecs", &xfs_params.syncd_timer.val,
+        sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+        &sysctl_intvec, NULL, 
+        &xfs_params.syncd_timer.min, &xfs_params.syncd_timer.max},
+        {XFS_INHERIT_SYNC, "inherit_sync", &xfs_params.inherit_sync.val,
+        sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+        &sysctl_intvec, NULL,
+        &xfs_params.inherit_sync.min, &xfs_params.inherit_sync.max},
+        {XFS_INHERIT_NODUMP, "inherit_nodump", &xfs_params.inherit_nodump.val,
+        sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+        &sysctl_intvec, NULL,
+        &xfs_params.inherit_nodump.min, &xfs_params.inherit_nodump.max},
+        {XFS_INHERIT_NOATIME, "inherit_noatime", &xfs_params.inherit_noatim.val,
+        sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+        &sysctl_intvec, NULL,
+        &xfs_params.inherit_noatim.min, &xfs_params.inherit_noatim.max},
+        
+        {XFS_BUF_TIMER, "xfsbufd_centisecs", &xfs_params.xfs_buf_timer.val,
+        sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+        &sysctl_intvec, NULL,
+        &xfs_params.xfs_buf_timer.min, &xfs_params.xfs_buf_timer.max},
+        {XFS_BUF_AGE, "age_buffer_centisecs", &xfs_params.xfs_buf_age.val,
+        sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+        &sysctl_intvec, NULL,
+        &xfs_params.xfs_buf_age.min, &xfs_params.xfs_buf_age.max},
+        {XFS_INHERIT_NOSYM, "inherit_nosymlinks", &xfs_params.inherit_nosym.val,
+        sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+        &sysctl_intvec, NULL,
+        &xfs_params.inherit_nosym.min, &xfs_params.inherit_nosym.max},
+        {XFS_ROTORSTEP, "rotorstep", &xfs_params.rotorstep.val,
+        sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+        &sysctl_intvec, NULL, 
+        &xfs_params.rotorstep.min, &xfs_params.rotorstep.max},
+        /* please keep this the last entry */
+#ifdef CONFIG_PROC_FS
+        {XFS_STATS_CLEAR, "stats_clear", &xfs_params.stats_clear.val,
+        sizeof(int), 0644, NULL, &xfs_stats_clear_proc_handler,
+        &sysctl_intvec, NULL, 
+        &xfs_params.stats_clear.min, &xfs_params.stats_clear.max},
+#endif /* CONFIG_PROC_FS */
+        {0}
+};
+STATIC ctl_table xfs_dir_table[] = {
+        {FS_XFS, "xfs", NULL, 0, 0555, xfs_table},
+        {0}
+};
+STATIC ctl_table xfs_root_table[] = {
+        {CTL_FS, "fs",  NULL, 0, 0555, xfs_dir_table},
+        {0}
+};
+void
+xfs_sysctl_register(void)
+{
+        xfs_table_header = register_sysctl_table(xfs_root_table, 1);
+}
+void
+xfs_sysctl_unregister(void)
+{
+        if (xfs_table_header)
+                unregister_sysctl_table(xfs_table_header);
+}
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.h b/fs/xfs/linux-2.6/xfs_sysctl.h
new file mode 100644
index 000000000000..a39a95020a58
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_sysctl.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2001-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SYSCTL_H__
+#define __XFS_SYSCTL_H__
+#include <linux/sysctl.h>
+/*
+ * Tunable xfs parameters
+ */
+typedef struct xfs_sysctl_val {
+        int min;
+        int val;
+        int max;
+} xfs_sysctl_val_t;
+typedef struct xfs_param {
+        xfs_sysctl_val_t restrict_chown;/* Root/non-root can give away files.*/
+        xfs_sysctl_val_t sgid_inherit;  /* Inherit S_ISGID if process' GID is
+                                         * not a member of parent dir GID. */
+        xfs_sysctl_val_t symlink_mode;  /* Link creat mode affected by umask */
+        xfs_sysctl_val_t panic_mask;    /* bitmask to cause panic on errors. */
+        xfs_sysctl_val_t error_level;   /* Degree of reporting for problems  */
+        xfs_sysctl_val_t syncd_timer;   /* Interval between xfssyncd wakeups */
+        xfs_sysctl_val_t stats_clear;   /* Reset all XFS statistics to zero. */
+        xfs_sysctl_val_t inherit_sync;  /* Inherit the "sync" inode flag. */
+        xfs_sysctl_val_t inherit_nodump;/* Inherit the "nodump" inode flag. */
+        xfs_sysctl_val_t inherit_noatim;/* Inherit the "noatime" inode flag. */
+        xfs_sysctl_val_t xfs_buf_timer; /* Interval between xfsbufd wakeups. */
+        xfs_sysctl_val_t xfs_buf_age;   /* Metadata buffer age before flush. */
+        xfs_sysctl_val_t inherit_nosym; /* Inherit the "nosymlinks" flag. */
+        xfs_sysctl_val_t rotorstep;     /* inode32 AG rotoring control knob */
+} xfs_param_t;
+/*
+ * xfs_error_level:
+ *
+ * How much error reporting will be done when internal problems are
+ * encountered.  These problems normally return an EFSCORRUPTED to their
+ * caller, with no other information reported.
+ *
+ * 0    No error reports
+ * 1    Report EFSCORRUPTED errors that will cause a filesystem shutdown
+ * 5    Report all EFSCORRUPTED errors (all of the above errors, plus any
+ *      additional errors that are known to not cause shutdowns)
+ *
+ * xfs_panic_mask bit 0x8 turns the error reports into panics
+ */
+enum {
+        /* XFS_REFCACHE_SIZE = 1 */
+        /* XFS_REFCACHE_PURGE = 2 */
+        XFS_RESTRICT_CHOWN = 3,
+        XFS_SGID_INHERIT = 4,
+        XFS_SYMLINK_MODE = 5,
+        XFS_PANIC_MASK = 6,
+        XFS_ERRLEVEL = 7,
+        XFS_SYNCD_TIMER = 8,
+        /* XFS_PROBE_DMAPI = 9 */
+        /* XFS_PROBE_IOOPS = 10 */
+        /* XFS_PROBE_QUOTA = 11 */
+        XFS_STATS_CLEAR = 12,
+        XFS_INHERIT_SYNC = 13,
+        XFS_INHERIT_NODUMP = 14,
+        XFS_INHERIT_NOATIME = 15,
+        XFS_BUF_TIMER = 16,
+        XFS_BUF_AGE = 17,
+        /* XFS_IO_BYPASS = 18 */
+        XFS_INHERIT_NOSYM = 19,
+        XFS_ROTORSTEP = 20,
+};
+extern xfs_param_t      xfs_params;
+#ifdef CONFIG_SYSCTL
+extern void xfs_sysctl_register(void);
+extern void xfs_sysctl_unregister(void);
+#else
+# define xfs_sysctl_register()          do { } while (0)
+# define xfs_sysctl_unregister()        do { } while (0)
+#endif /* CONFIG_SYSCTL */
+#endif /* __XFS_SYSCTL_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_version.h b/fs/xfs/linux-2.6/xfs_version.h
new file mode 100644
index 000000000000..96f96394417e
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_version.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2001-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+/*
+ * Dummy file that can contain a timestamp to put into the
+ * XFS init string, to help users keep track of what they're
+ * running
+ */
+#ifndef __XFS_VERSION_H__
+#define __XFS_VERSION_H__
+#define XFS_VERSION_STRING "SGI XFS"
+#endif /* __XFS_VERSION_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_vfs.c b/fs/xfs/linux-2.6/xfs_vfs.c
new file mode 100644
index 000000000000..669c61644959
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_vfs.c
@@ -0,0 +1,330 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_macros.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_clnt.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_dir.h"
+#include "xfs_dir2.h"
+#include "xfs_imap.h"
+#include "xfs_alloc.h"
+#include "xfs_dmapi.h"
+#include "xfs_mount.h"
+#include "xfs_quota.h"
+int
+vfs_mount(
+        struct bhv_desc         *bdp,
+        struct xfs_mount_args   *args,
+        struct cred             *cr)
+{
+        struct bhv_desc         *next = bdp;
+        ASSERT(next);
+        while (! (bhvtovfsops(next))->vfs_mount)
+                next = BHV_NEXT(next);
+        return ((*bhvtovfsops(next)->vfs_mount)(next, args, cr));
+}
+int
+vfs_parseargs(
+        struct bhv_desc         *bdp,
+        char                    *s,
+        struct xfs_mount_args   *args,
+        int                     f)
+{
+        struct bhv_desc         *next = bdp;
+        ASSERT(next);
+        while (! (bhvtovfsops(next))->vfs_parseargs)
+                next = BHV_NEXT(next);
+        return ((*bhvtovfsops(next)->vfs_parseargs)(next, s, args, f));
+}
+int
+vfs_showargs(
+        struct bhv_desc         *bdp,
+        struct seq_file         *m)
+{
+        struct bhv_desc         *next = bdp;
+        ASSERT(next);
+        while (! (bhvtovfsops(next))->vfs_showargs)
+                next = BHV_NEXT(next);
+        return ((*bhvtovfsops(next)->vfs_showargs)(next, m));
+}
+int
+vfs_unmount(
+        struct bhv_desc         *bdp,
+        int                     fl,
+        struct cred             *cr)
+{
+        struct bhv_desc         *next = bdp;
+        ASSERT(next);
+        while (! (bhvtovfsops(next))->vfs_unmount)
+                next = BHV_NEXT(next);
+        return ((*bhvtovfsops(next)->vfs_unmount)(next, fl, cr));
+}
+int
+vfs_mntupdate(
+        struct bhv_desc         *bdp,
+        int                     *fl,
+        struct xfs_mount_args   *args)
+{
+        struct bhv_desc         *next = bdp;
+        ASSERT(next);
+        while (! (bhvtovfsops(next))->vfs_mntupdate)
+                next = BHV_NEXT(next);
+        return ((*bhvtovfsops(next)->vfs_mntupdate)(next, fl, args));
+}
+int
+vfs_root(
+        struct bhv_desc         *bdp,
+        struct vnode            **vpp)
+{
+        struct bhv_desc         *next = bdp;
+        ASSERT(next);
+        while (! (bhvtovfsops(next))->vfs_root)
+                next = BHV_NEXT(next);
+        return ((*bhvtovfsops(next)->vfs_root)(next, vpp));
+}
+int
+vfs_statvfs(
+        struct bhv_desc         *bdp,
+        xfs_statfs_t            *sp,
+        struct vnode            *vp)
+{
+        struct bhv_desc         *next = bdp;
+        ASSERT(next);
+        while (! (bhvtovfsops(next))->vfs_statvfs)
+                next = BHV_NEXT(next);
+        return ((*bhvtovfsops(next)->vfs_statvfs)(next, sp, vp));
+}
+int
+vfs_sync(
+        struct bhv_desc         *bdp,
+        int                     fl,
+        struct cred             *cr)
+{
+        struct bhv_desc         *next = bdp;
+        ASSERT(next);
+        while (! (bhvtovfsops(next))->vfs_sync)
+                next = BHV_NEXT(next);
+        return ((*bhvtovfsops(next)->vfs_sync)(next, fl, cr));
+}
+int
+vfs_vget(
+        struct bhv_desc         *bdp,
+        struct vnode            **vpp,
+        struct fid              *fidp)
+{
+        struct bhv_desc         *next = bdp;
+        ASSERT(next);
+        while (! (bhvtovfsops(next))->vfs_vget)
+                next = BHV_NEXT(next);
+        return ((*bhvtovfsops(next)->vfs_vget)(next, vpp, fidp));
+}
+int
+vfs_dmapiops(
+        struct bhv_desc         *bdp,
+        caddr_t                 addr)
+{
+        struct bhv_desc         *next = bdp;
+        ASSERT(next);
+        while (! (bhvtovfsops(next))->vfs_dmapiops)
+                next = BHV_NEXT(next);
+        return ((*bhvtovfsops(next)->vfs_dmapiops)(next, addr));
+}
+int
+vfs_quotactl(
+        struct bhv_desc         *bdp,
+        int                     cmd,
+        int                     id,
+        caddr_t                 addr)
+{
+        struct bhv_desc         *next = bdp;
+        ASSERT(next);
+        while (! (bhvtovfsops(next))->vfs_quotactl)
+                next = BHV_NEXT(next);
+        return ((*bhvtovfsops(next)->vfs_quotactl)(next, cmd, id, addr));
+}
+void
+vfs_init_vnode(
+        struct bhv_desc         *bdp,
+        struct vnode            *vp,
+        struct bhv_desc         *bp,
+        int                     unlock)
+{
+        struct bhv_desc         *next = bdp;
+        ASSERT(next);
+        while (! (bhvtovfsops(next))->vfs_init_vnode)
+                next = BHV_NEXT(next);
+        ((*bhvtovfsops(next)->vfs_init_vnode)(next, vp, bp, unlock));
+}
+void
+vfs_force_shutdown(
+        struct bhv_desc         *bdp,
+        int                     fl,
+        char                    *file,
+        int                     line)
+{
+        struct bhv_desc         *next = bdp;
+        ASSERT(next);
+        while (! (bhvtovfsops(next))->vfs_force_shutdown)
+                next = BHV_NEXT(next);
+        ((*bhvtovfsops(next)->vfs_force_shutdown)(next, fl, file, line));
+}
+void
+vfs_freeze(
+        struct bhv_desc         *bdp)
+{
+        struct bhv_desc         *next = bdp;
+        ASSERT(next);
+        while (! (bhvtovfsops(next))->vfs_freeze)
+                next = BHV_NEXT(next);
+        ((*bhvtovfsops(next)->vfs_freeze)(next));
+}
+vfs_t *
+vfs_allocate( void )
+{
+        struct vfs              *vfsp;
+        vfsp = kmem_zalloc(sizeof(vfs_t), KM_SLEEP);
+        bhv_head_init(VFS_BHVHEAD(vfsp), "vfs");
+        INIT_LIST_HEAD(&vfsp->vfs_sync_list);
+        spin_lock_init(&vfsp->vfs_sync_lock);
+        init_waitqueue_head(&vfsp->vfs_wait_sync_task);
+        init_waitqueue_head(&vfsp->vfs_wait_single_sync_task);
+        return vfsp;
+}
+void
+vfs_deallocate(
+        struct vfs              *vfsp)
+{
+        bhv_head_destroy(VFS_BHVHEAD(vfsp));
+        kmem_free(vfsp, sizeof(vfs_t));
+}
+void
+vfs_insertops(
+        struct vfs              *vfsp,
+        struct bhv_vfsops       *vfsops)
+{
+        struct bhv_desc         *bdp;
+        bdp = kmem_alloc(sizeof(struct bhv_desc), KM_SLEEP);
+        bhv_desc_init(bdp, NULL, vfsp, vfsops);
+        bhv_insert(&vfsp->vfs_bh, bdp);
+}
+void
+vfs_insertbhv(
+        struct vfs              *vfsp,
+        struct bhv_desc         *bdp,
+        struct vfsops           *vfsops,
+        void                    *mount)
+{
+        bhv_desc_init(bdp, mount, vfsp, vfsops);
+        bhv_insert_initial(&vfsp->vfs_bh, bdp);
+}
+void
+bhv_remove_vfsops(
+        struct vfs              *vfsp,
+        int                     pos)
+{
+        struct bhv_desc         *bhv;
+        bhv = bhv_lookup_range(&vfsp->vfs_bh, pos, pos);
+        if (!bhv)
+                return;
+        bhv_remove(&vfsp->vfs_bh, bhv);
+        kmem_free(bhv, sizeof(*bhv));
+}
+void
+bhv_remove_all_vfsops(
+        struct vfs              *vfsp,
+        int                     freebase)
+{
+        struct xfs_mount        *mp;
+        bhv_remove_vfsops(vfsp, VFS_POSITION_QM);
+        bhv_remove_vfsops(vfsp, VFS_POSITION_DM);
+        if (!freebase)
+                return;
+        mp = XFS_BHVTOM(bhv_lookup(VFS_BHVHEAD(vfsp), &xfs_vfsops));
+        VFS_REMOVEBHV(vfsp, &mp->m_bhv);
+        xfs_mount_free(mp, 0);
+}
+void
+bhv_insert_all_vfsops(
+        struct vfs              *vfsp)
+{
+        struct xfs_mount        *mp;
+        mp = xfs_mount_init();
+        vfs_insertbhv(vfsp, &mp->m_bhv, &xfs_vfsops, mp);
+        vfs_insertdmapi(vfsp);
+        vfs_insertquota(vfsp);
+}
diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h
new file mode 100644
index 000000000000..76493991578f
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_vfs.h
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_VFS_H__
+#define __XFS_VFS_H__
+#include <linux/vfs.h>
+#include "xfs_fs.h"
+struct fid;
+struct vfs;
+struct cred;
+struct vnode;
+struct kstatfs;
+struct seq_file;
+struct super_block;
+struct xfs_mount_args;
+typedef struct kstatfs xfs_statfs_t;
+typedef struct vfs_sync_work {
+        struct list_head        w_list;
+        struct vfs              *w_vfs;
+        void                    *w_data;        /* syncer routine argument */
+        void                    (*w_syncer)(struct vfs *, void *);
+} vfs_sync_work_t;
+typedef struct vfs {
+        u_int                   vfs_flag;       /* flags */
+        xfs_fsid_t              vfs_fsid;       /* file system ID */
+        xfs_fsid_t              *vfs_altfsid;   /* An ID fixed for life of FS */
+        bhv_head_t              vfs_bh;         /* head of vfs behavior chain */
+        struct super_block      *vfs_super;     /* generic superblock pointer */
+        struct task_struct      *vfs_sync_task; /* generalised sync thread */
+        vfs_sync_work_t         vfs_sync_work;  /* work item for VFS_SYNC */
+        struct list_head        vfs_sync_list;  /* sync thread work item list */
+        spinlock_t              vfs_sync_lock;  /* work item list lock */
+        int                     vfs_sync_seq;   /* sync thread generation no. */
+        wait_queue_head_t       vfs_wait_single_sync_task;
+        wait_queue_head_t       vfs_wait_sync_task;
+} vfs_t;
+#define vfs_fbhv                vfs_bh.bh_first /* 1st on vfs behavior chain */
+#define bhvtovfs(bdp)           ( (struct vfs *)BHV_VOBJ(bdp) )
+#define bhvtovfsops(bdp)        ( (struct vfsops *)BHV_OPS(bdp) )
+#define VFS_BHVHEAD(vfs)        ( &(vfs)->vfs_bh )
+#define VFS_REMOVEBHV(vfs, bdp) ( bhv_remove(VFS_BHVHEAD(vfs), bdp) )
+#define VFS_POSITION_BASE       BHV_POSITION_BASE       /* chain bottom */
+#define VFS_POSITION_TOP        BHV_POSITION_TOP        /* chain top */
+#define VFS_POSITION_INVALID    BHV_POSITION_INVALID    /* invalid pos. num */
+typedef enum {
+        VFS_BHV_UNKNOWN,        /* not specified */
+        VFS_BHV_XFS,            /* xfs */
+        VFS_BHV_DM,             /* data migration */
+        VFS_BHV_QM,             /* quota manager */
+        VFS_BHV_IO,             /* IO path */
+        VFS_BHV_END             /* housekeeping end-of-range */
+} vfs_bhv_t;
+#define VFS_POSITION_XFS        (BHV_POSITION_BASE)
+#define VFS_POSITION_DM         (VFS_POSITION_BASE+10)
+#define VFS_POSITION_QM         (VFS_POSITION_BASE+20)
+#define VFS_POSITION_IO         (VFS_POSITION_BASE+30)
+#define VFS_RDONLY              0x0001  /* read-only vfs */
+#define VFS_GRPID               0x0002  /* group-ID assigned from directory */
+#define VFS_DMI                 0x0004  /* filesystem has the DMI enabled */
+#define VFS_UMOUNT              0x0008  /* unmount in progress */
+#define VFS_END                 0x0008  /* max flag */
+#define SYNC_ATTR               0x0001  /* sync attributes */
+#define SYNC_CLOSE              0x0002  /* close file system down */
+#define SYNC_DELWRI             0x0004  /* look at delayed writes */
+#define SYNC_WAIT               0x0008  /* wait for i/o to complete */
+#define SYNC_BDFLUSH            0x0010  /* BDFLUSH is calling -- don't block */
+#define SYNC_FSDATA             0x0020  /* flush fs data (e.g. superblocks) */
+#define SYNC_REFCACHE           0x0040  /* prune some of the nfs ref cache */
+#define SYNC_REMOUNT            0x0080  /* remount readonly, no dummy LRs */
+typedef int     (*vfs_mount_t)(bhv_desc_t *,
+                                struct xfs_mount_args *, struct cred *);
+typedef int     (*vfs_parseargs_t)(bhv_desc_t *, char *,
+                                struct xfs_mount_args *, int);
+typedef int     (*vfs_showargs_t)(bhv_desc_t *, struct seq_file *);
+typedef int     (*vfs_unmount_t)(bhv_desc_t *, int, struct cred *);
+typedef int     (*vfs_mntupdate_t)(bhv_desc_t *, int *,
+                                struct xfs_mount_args *);
+typedef int     (*vfs_root_t)(bhv_desc_t *, struct vnode **);
+typedef int     (*vfs_statvfs_t)(bhv_desc_t *, xfs_statfs_t *, struct vnode *);
+typedef int     (*vfs_sync_t)(bhv_desc_t *, int, struct cred *);
+typedef int     (*vfs_vget_t)(bhv_desc_t *, struct vnode **, struct fid *);
+typedef int     (*vfs_dmapiops_t)(bhv_desc_t *, caddr_t);
+typedef int     (*vfs_quotactl_t)(bhv_desc_t *, int, int, caddr_t);
+typedef void    (*vfs_init_vnode_t)(bhv_desc_t *,
+                                struct vnode *, bhv_desc_t *, int);
+typedef void    (*vfs_force_shutdown_t)(bhv_desc_t *, int, char *, int);
+typedef void    (*vfs_freeze_t)(bhv_desc_t *);
+typedef struct vfsops {
+        bhv_position_t          vf_position;    /* behavior chain position */
+        vfs_mount_t             vfs_mount;      /* mount file system */
+        vfs_parseargs_t         vfs_parseargs;  /* parse mount options */
+        vfs_showargs_t          vfs_showargs;   /* unparse mount options */
+        vfs_unmount_t           vfs_unmount;    /* unmount file system */
+        vfs_mntupdate_t         vfs_mntupdate;  /* update file system options */
+        vfs_root_t              vfs_root;       /* get root vnode */
+        vfs_statvfs_t           vfs_statvfs;    /* file system statistics */
+        vfs_sync_t              vfs_sync;       /* flush files */
+        vfs_vget_t              vfs_vget;       /* get vnode from fid */
+        vfs_dmapiops_t          vfs_dmapiops;   /* data migration */
+        vfs_quotactl_t          vfs_quotactl;   /* disk quota */
+        vfs_init_vnode_t        vfs_init_vnode; /* initialize a new vnode */
+        vfs_force_shutdown_t    vfs_force_shutdown;     /* crash and burn */
+        vfs_freeze_t            vfs_freeze;     /* freeze fs for snapshot */
+} vfsops_t;
+/*
+ * VFS's.  Operates on vfs structure pointers (starts at bhv head).
+ */
+#define VHEAD(v)                        ((v)->vfs_fbhv)
+#define VFS_MOUNT(v, ma,cr, rv)         ((rv) = vfs_mount(VHEAD(v), ma,cr))
+#define VFS_PARSEARGS(v, o,ma,f, rv)    ((rv) = vfs_parseargs(VHEAD(v), o,ma,f))
+#define VFS_SHOWARGS(v, m, rv)          ((rv) = vfs_showargs(VHEAD(v), m))
+#define VFS_UNMOUNT(v, f, cr, rv)       ((rv) = vfs_unmount(VHEAD(v), f,cr))
+#define VFS_MNTUPDATE(v, fl, args, rv)  ((rv) = vfs_mntupdate(VHEAD(v), fl, args))
+#define VFS_ROOT(v, vpp, rv)            ((rv) = vfs_root(VHEAD(v), vpp))
+#define VFS_STATVFS(v, sp,vp, rv)       ((rv) = vfs_statvfs(VHEAD(v), sp,vp))
+#define VFS_SYNC(v, flag,cr, rv)        ((rv) = vfs_sync(VHEAD(v), flag,cr))
+#define VFS_VGET(v, vpp,fidp, rv)       ((rv) = vfs_vget(VHEAD(v), vpp,fidp))
+#define VFS_DMAPIOPS(v, p, rv)          ((rv) = vfs_dmapiops(VHEAD(v), p))
+#define VFS_QUOTACTL(v, c,id,p, rv)     ((rv) = vfs_quotactl(VHEAD(v), c,id,p))
+#define VFS_INIT_VNODE(v, vp,b,ul)      ( vfs_init_vnode(VHEAD(v), vp,b,ul) )
+#define VFS_FORCE_SHUTDOWN(v, fl,f,l)   ( vfs_force_shutdown(VHEAD(v), fl,f,l) )
+#define VFS_FREEZE(v)                   ( vfs_freeze(VHEAD(v)) )
+/*
+ * PVFS's.  Operates on behavior descriptor pointers.
+ */
+#define PVFS_MOUNT(b, ma,cr, rv)        ((rv) = vfs_mount(b, ma,cr))
+#define PVFS_PARSEARGS(b, o,ma,f, rv)   ((rv) = vfs_parseargs(b, o,ma,f))
+#define PVFS_SHOWARGS(b, m, rv)         ((rv) = vfs_showargs(b, m))
+#define PVFS_UNMOUNT(b, f,cr, rv)       ((rv) = vfs_unmount(b, f,cr))
+#define PVFS_MNTUPDATE(b, fl, args, rv) ((rv) = vfs_mntupdate(b, fl, args))
+#define PVFS_ROOT(b, vpp, rv)           ((rv) = vfs_root(b, vpp))
+#define PVFS_STATVFS(b, sp,vp, rv)      ((rv) = vfs_statvfs(b, sp,vp))
+#define PVFS_SYNC(b, flag,cr, rv)       ((rv) = vfs_sync(b, flag,cr))
+#define PVFS_VGET(b, vpp,fidp, rv)      ((rv) = vfs_vget(b, vpp,fidp))
+#define PVFS_DMAPIOPS(b, p, rv)         ((rv) = vfs_dmapiops(b, p))
+#define PVFS_QUOTACTL(b, c,id,p, rv)    ((rv) = vfs_quotactl(b, c,id,p))
+#define PVFS_INIT_VNODE(b, vp,b2,ul)    ( vfs_init_vnode(b, vp,b2,ul) )
+#define PVFS_FORCE_SHUTDOWN(b, fl,f,l)  ( vfs_force_shutdown(b, fl,f,l) )
+#define PVFS_FREEZE(b)                  ( vfs_freeze(b) )
+extern int vfs_mount(bhv_desc_t *, struct xfs_mount_args *, struct cred *);
+extern int vfs_parseargs(bhv_desc_t *, char *, struct xfs_mount_args *, int);
+extern int vfs_showargs(bhv_desc_t *, struct seq_file *);
+extern int vfs_unmount(bhv_desc_t *, int, struct cred *);
+extern int vfs_mntupdate(bhv_desc_t *, int *, struct xfs_mount_args *);
+extern int vfs_root(bhv_desc_t *, struct vnode **);
+extern int vfs_statvfs(bhv_desc_t *, xfs_statfs_t *, struct vnode *);
+extern int vfs_sync(bhv_desc_t *, int, struct cred *);
+extern int vfs_vget(bhv_desc_t *, struct vnode **, struct fid *);
+extern int vfs_dmapiops(bhv_desc_t *, caddr_t);
+extern int vfs_quotactl(bhv_desc_t *, int, int, caddr_t);
+extern void vfs_init_vnode(bhv_desc_t *, struct vnode *, bhv_desc_t *, int);
+extern void vfs_force_shutdown(bhv_desc_t *, int, char *, int);
+extern void vfs_freeze(bhv_desc_t *);
+typedef struct bhv_vfsops {
+        struct vfsops           bhv_common;
+        void *                  bhv_custom;
+} bhv_vfsops_t;
+#define vfs_bhv_lookup(v, id)   ( bhv_lookup_range(&(v)->vfs_bh, (id), (id)) )
+#define vfs_bhv_custom(b)       ( ((bhv_vfsops_t *)BHV_OPS(b))->bhv_custom )
+#define vfs_bhv_set_custom(b,o) ( (b)->bhv_custom = (void *)(o))
+#define vfs_bhv_clr_custom(b)   ( (b)->bhv_custom = NULL )
+extern vfs_t *vfs_allocate(void);
+extern void vfs_deallocate(vfs_t *);
+extern void vfs_insertops(vfs_t *, bhv_vfsops_t *);
+extern void vfs_insertbhv(vfs_t *, bhv_desc_t *, vfsops_t *, void *);
+extern void bhv_insert_all_vfsops(struct vfs *);
+extern void bhv_remove_all_vfsops(struct vfs *, int);
+extern void bhv_remove_vfsops(struct vfs *, int);
+#define fs_frozen(vfsp)         ((vfsp)->vfs_super->s_frozen)
+#define fs_check_frozen(vfsp, level) \
+        vfs_check_frozen(vfsp->vfs_super, level);
+#endif  /* __XFS_VFS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
new file mode 100644
index 000000000000..849c61c74f3c
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_vnode.c
@@ -0,0 +1,455 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#include "xfs.h"
+uint64_t vn_generation;         /* vnode generation number */
+DEFINE_SPINLOCK(vnumber_lock);
+/*
+ * Dedicated vnode inactive/reclaim sync semaphores.
+ * Prime number of hash buckets since address is used as the key.
+ */
+#define NVSYNC                  37
+#define vptosync(v)             (&vsync[((unsigned long)v) % NVSYNC])
+sv_t vsync[NVSYNC];
+/*
+ * Translate stat(2) file types to vnode types and vice versa.
+ * Aware of numeric order of S_IFMT and vnode type values.
+ */
+enum vtype iftovt_tab[] = {
+        VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
+        VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VNON
+};
+u_short vttoif_tab[] = {
+        0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFIFO, 0, S_IFSOCK
+};
+void
+vn_init(void)
+{
+        register sv_t *svp;
+        register int i;
+        for (svp = vsync, i = 0; i < NVSYNC; i++, svp++)
+                init_sv(svp, SV_DEFAULT, "vsy", i);
+}
+/*
+ * Clean a vnode of filesystem-specific data and prepare it for reuse.
+ */
+STATIC int
+vn_reclaim(
+        struct vnode    *vp)
+{
+        int             error;
+        XFS_STATS_INC(vn_reclaim);
+        vn_trace_entry(vp, "vn_reclaim", (inst_t *)__return_address);
+        /*
+         * Only make the VOP_RECLAIM call if there are behaviors
+         * to call.
+         */
+        if (vp->v_fbhv) {
+                VOP_RECLAIM(vp, error);
+                if (error)
+                        return -error;
+        }
+        ASSERT(vp->v_fbhv == NULL);
+        VN_LOCK(vp);
+        vp->v_flag &= (VRECLM|VWAIT);
+        VN_UNLOCK(vp, 0);
+        vp->v_type = VNON;
+        vp->v_fbhv = NULL;
+#ifdef XFS_VNODE_TRACE
+        ktrace_free(vp->v_trace);
+        vp->v_trace = NULL;
+#endif
+        return 0;
+}
+STATIC void
+vn_wakeup(
+        struct vnode    *vp)
+{
+        VN_LOCK(vp);
+        if (vp->v_flag & VWAIT)
+                sv_broadcast(vptosync(vp));
+        vp->v_flag &= ~(VRECLM|VWAIT|VMODIFIED);
+        VN_UNLOCK(vp, 0);
+}
+int
+vn_wait(
+        struct vnode    *vp)
+{
+        VN_LOCK(vp);
+        if (vp->v_flag & (VINACT | VRECLM)) {
+                vp->v_flag |= VWAIT;
+                sv_wait(vptosync(vp), PINOD, &vp->v_lock, 0);
+                return 1;
+        }
+        VN_UNLOCK(vp, 0);
+        return 0;
+}
+struct vnode *
+vn_initialize(
+        struct inode    *inode)
+{
+        struct vnode    *vp = LINVFS_GET_VP(inode);
+        XFS_STATS_INC(vn_active);
+        XFS_STATS_INC(vn_alloc);
+        vp->v_flag = VMODIFIED;
+        spinlock_init(&vp->v_lock, "v_lock");
+        spin_lock(&vnumber_lock);
+        if (!++vn_generation)   /* v_number shouldn't be zero */
+                vn_generation++;
+        vp->v_number = vn_generation;
+        spin_unlock(&vnumber_lock);
+        ASSERT(VN_CACHED(vp) == 0);
+        /* Initialize the first behavior and the behavior chain head. */
+        vn_bhv_head_init(VN_BHV_HEAD(vp), "vnode");
+#ifdef  XFS_VNODE_TRACE
+        vp->v_trace = ktrace_alloc(VNODE_TRACE_SIZE, KM_SLEEP);
+        printk("Allocated VNODE_TRACE at 0x%p\n", vp->v_trace);
+#endif  /* XFS_VNODE_TRACE */
+        vn_trace_exit(vp, "vn_initialize", (inst_t *)__return_address);
+        return vp;
+}
+/*
+ * Get a reference on a vnode.
+ */
+vnode_t *
+vn_get(
+        struct vnode    *vp,
+        vmap_t          *vmap)
+{
+        struct inode    *inode;
+        XFS_STATS_INC(vn_get);
+        inode = LINVFS_GET_IP(vp);
+        if (inode->i_state & I_FREEING)
+                return NULL;
+        inode = ilookup(vmap->v_vfsp->vfs_super, vmap->v_ino);
+        if (!inode)     /* Inode not present */
+                return NULL;
+        vn_trace_exit(vp, "vn_get", (inst_t *)__return_address);
+        return vp;
+}
+/*
+ * Revalidate the Linux inode from the vattr.
+ * Note: i_size _not_ updated; we must hold the inode
+ * semaphore when doing that - callers responsibility.
+ */
+void
+vn_revalidate_core(
+        struct vnode    *vp,
+        vattr_t         *vap)
+{
+        struct inode    *inode = LINVFS_GET_IP(vp);
+        inode->i_mode       = VTTOIF(vap->va_type) | vap->va_mode;
+        inode->i_nlink      = vap->va_nlink;
+        inode->i_uid        = vap->va_uid;
+        inode->i_gid        = vap->va_gid;
+        inode->i_blocks     = vap->va_nblocks;
+        inode->i_mtime      = vap->va_mtime;
+        inode->i_ctime      = vap->va_ctime;
+        inode->i_atime      = vap->va_atime;
+        if (vap->va_xflags & XFS_XFLAG_IMMUTABLE)
+                inode->i_flags |= S_IMMUTABLE;
+        else
+                inode->i_flags &= ~S_IMMUTABLE;
+        if (vap->va_xflags & XFS_XFLAG_APPEND)
+                inode->i_flags |= S_APPEND;
+        else
+                inode->i_flags &= ~S_APPEND;
+        if (vap->va_xflags & XFS_XFLAG_SYNC)
+                inode->i_flags |= S_SYNC;
+        else
+                inode->i_flags &= ~S_SYNC;
+        if (vap->va_xflags & XFS_XFLAG_NOATIME)
+                inode->i_flags |= S_NOATIME;
+        else
+                inode->i_flags &= ~S_NOATIME;
+}
+/*
+ * Revalidate the Linux inode from the vnode.
+ */
+int
+vn_revalidate(
+        struct vnode    *vp)
+{
+        vattr_t         va;
+        int             error;
+        vn_trace_entry(vp, "vn_revalidate", (inst_t *)__return_address);
+        ASSERT(vp->v_fbhv != NULL);
+        va.va_mask = XFS_AT_STAT|XFS_AT_XFLAGS;
+        VOP_GETATTR(vp, &va, 0, NULL, error);
+        if (!error) {
+                vn_revalidate_core(vp, &va);
+                VUNMODIFY(vp);
+        }
+        return -error;
+}
+/*
+ * purge a vnode from the cache
+ * At this point the vnode is guaranteed to have no references (vn_count == 0)
+ * The caller has to make sure that there are no ways someone could
+ * get a handle (via vn_get) on the vnode (usually done via a mount/vfs lock).
+ */
+void
+vn_purge(
+        struct vnode    *vp,
+        vmap_t          *vmap)
+{
+        vn_trace_entry(vp, "vn_purge", (inst_t *)__return_address);
+again:
+        /*
+         * Check whether vp has already been reclaimed since our caller
+         * sampled its version while holding a filesystem cache lock that
+         * its VOP_RECLAIM function acquires.
+         */
+        VN_LOCK(vp);
+        if (vp->v_number != vmap->v_number) {
+                VN_UNLOCK(vp, 0);
+                return;
+        }
+        /*
+         * If vp is being reclaimed or inactivated, wait until it is inert,
+         * then proceed.  Can't assume that vnode is actually reclaimed
+         * just because the reclaimed flag is asserted -- a vn_alloc
+         * reclaim can fail.
+         */
+        if (vp->v_flag & (VINACT | VRECLM)) {
+                ASSERT(vn_count(vp) == 0);
+                vp->v_flag |= VWAIT;
+                sv_wait(vptosync(vp), PINOD, &vp->v_lock, 0);
+                goto again;
+        }
+        /*
+         * Another process could have raced in and gotten this vnode...
+         */
+        if (vn_count(vp) > 0) {
+                VN_UNLOCK(vp, 0);
+                return;
+        }
+        XFS_STATS_DEC(vn_active);
+        vp->v_flag |= VRECLM;
+        VN_UNLOCK(vp, 0);
+        /*
+         * Call VOP_RECLAIM and clean vp. The FSYNC_INVAL flag tells
+         * vp's filesystem to flush and invalidate all cached resources.
+         * When vn_reclaim returns, vp should have no private data,
+         * either in a system cache or attached to v_data.
+         */
+        if (vn_reclaim(vp) != 0)
+                panic("vn_purge: cannot reclaim");
+        /*
+         * Wakeup anyone waiting for vp to be reclaimed.
+         */
+        vn_wakeup(vp);
+}
+/*
+ * Add a reference to a referenced vnode.
+ */
+struct vnode *
+vn_hold(
+        struct vnode    *vp)
+{
+        struct inode    *inode;
+        XFS_STATS_INC(vn_hold);
+        VN_LOCK(vp);
+        inode = igrab(LINVFS_GET_IP(vp));
+        ASSERT(inode);
+        VN_UNLOCK(vp, 0);
+        return vp;
+}
+/*
+ *  Call VOP_INACTIVE on last reference.
+ */
+void
+vn_rele(
+        struct vnode    *vp)
+{
+        int             vcnt;
+        int             cache;
+        XFS_STATS_INC(vn_rele);
+        VN_LOCK(vp);
+        vn_trace_entry(vp, "vn_rele", (inst_t *)__return_address);
+        vcnt = vn_count(vp);
+        /*
+         * Since we always get called from put_inode we know
+         * that i_count won't be decremented after we
+         * return.
+         */
+        if (!vcnt) {
+                /*
+                 * As soon as we turn this on, noone can find us in vn_get
+                 * until we turn off VINACT or VRECLM
+                 */
+                vp->v_flag |= VINACT;
+                VN_UNLOCK(vp, 0);
+                /*
+                 * Do not make the VOP_INACTIVE call if there
+                 * are no behaviors attached to the vnode to call.
+                 */
+                if (vp->v_fbhv)
+                        VOP_INACTIVE(vp, NULL, cache);
+                VN_LOCK(vp);
+                if (vp->v_flag & VWAIT)
+                        sv_broadcast(vptosync(vp));
+                vp->v_flag &= ~(VINACT|VWAIT|VRECLM|VMODIFIED);
+        }
+        VN_UNLOCK(vp, 0);
+        vn_trace_exit(vp, "vn_rele", (inst_t *)__return_address);
+}
+/*
+ * Finish the removal of a vnode.
+ */
+void
+vn_remove(
+        struct vnode    *vp)
+{
+        vmap_t          vmap;
+        /* Make sure we don't do this to the same vnode twice */
+        if (!(vp->v_fbhv))
+                return;
+        XFS_STATS_INC(vn_remove);
+        vn_trace_exit(vp, "vn_remove", (inst_t *)__return_address);
+        /*
+         * After the following purge the vnode
+         * will no longer exist.
+         */
+        VMAP(vp, vmap);
+        vn_purge(vp, &vmap);
+}
+#ifdef  XFS_VNODE_TRACE
+#define KTRACE_ENTER(vp, vk, s, line, ra)                       \
+        ktrace_enter(   (vp)->v_trace,                          \
+/*  0 */                (void *)(__psint_t)(vk),                \
+/*  1 */                (void *)(s),                            \
+/*  2 */                (void *)(__psint_t) line,               \
+/*  3 */                (void *)(vn_count(vp)), \
+/*  4 */                (void *)(ra),                           \
+/*  5 */                (void *)(__psunsigned_t)(vp)->v_flag,   \
+/*  6 */                (void *)(__psint_t)current_cpu(),       \
+/*  7 */                (void *)(__psint_t)current_pid(),       \
+/*  8 */                (void *)__return_address,               \
+/*  9 */                0, 0, 0, 0, 0, 0, 0)
+/*
+ * Vnode tracing code.
+ */
+void
+vn_trace_entry(vnode_t *vp, char *func, inst_t *ra)
+{
+        KTRACE_ENTER(vp, VNODE_KTRACE_ENTRY, func, 0, ra);
+}
+void
+vn_trace_exit(vnode_t *vp, char *func, inst_t *ra)
+{
+        KTRACE_ENTER(vp, VNODE_KTRACE_EXIT, func, 0, ra);
+}
+void
+vn_trace_hold(vnode_t *vp, char *file, int line, inst_t *ra)
+{
+        KTRACE_ENTER(vp, VNODE_KTRACE_HOLD, file, line, ra);
+}
+void
+vn_trace_ref(vnode_t *vp, char *file, int line, inst_t *ra)
+{
+        KTRACE_ENTER(vp, VNODE_KTRACE_REF, file, line, ra);
+}
+void
+vn_trace_rele(vnode_t *vp, char *file, int line, inst_t *ra)
+{
+        KTRACE_ENTER(vp, VNODE_KTRACE_RELE, file, line, ra);
+}
+#endif  /* XFS_VNODE_TRACE */
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
new file mode 100644
index 000000000000..da76c1f1e11c
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -0,0 +1,666 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ *
+ * Portions Copyright (c) 1989, 1993
+ *      The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#ifndef __XFS_VNODE_H__
+#define __XFS_VNODE_H__
+struct uio;
+struct file;
+struct vattr;
+struct xfs_iomap;
+struct attrlist_cursor_kern;
+/*
+ * Vnode types.  VNON means no type.
+ */
+enum vtype      { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VFIFO, VBAD, VSOCK };
+typedef xfs_ino_t vnumber_t;
+typedef struct dentry vname_t;
+typedef bhv_head_t vn_bhv_head_t;
+/*
+ * MP locking protocols:
+ *      v_flag, v_vfsp                          VN_LOCK/VN_UNLOCK
+ *      v_type                                  read-only or fs-dependent
+ */
+typedef struct vnode {
+        __u32           v_flag;                 /* vnode flags (see below) */
+        enum vtype      v_type;                 /* vnode type */
+        struct vfs      *v_vfsp;                /* ptr to containing VFS */
+        vnumber_t       v_number;               /* in-core vnode number */
+        vn_bhv_head_t   v_bh;                   /* behavior head */
+        spinlock_t      v_lock;                 /* VN_LOCK/VN_UNLOCK */
+        struct inode    v_inode;                /* Linux inode */
+#ifdef XFS_VNODE_TRACE
+        struct ktrace   *v_trace;               /* trace header structure    */
+#endif
+} vnode_t;
+#define v_fbhv                  v_bh.bh_first          /* first behavior */
+#define v_fops                  v_bh.bh_first->bd_ops  /* first behavior ops */
+#define VNODE_POSITION_BASE     BHV_POSITION_BASE       /* chain bottom */
+#define VNODE_POSITION_TOP      BHV_POSITION_TOP        /* chain top */
+#define VNODE_POSITION_INVALID  BHV_POSITION_INVALID    /* invalid pos. num */
+typedef enum {
+        VN_BHV_UNKNOWN,         /* not specified */
+        VN_BHV_XFS,             /* xfs */
+        VN_BHV_DM,              /* data migration */
+        VN_BHV_QM,              /* quota manager */
+        VN_BHV_IO,              /* IO path */
+        VN_BHV_END              /* housekeeping end-of-range */
+} vn_bhv_t;
+#define VNODE_POSITION_XFS      (VNODE_POSITION_BASE)
+#define VNODE_POSITION_DM       (VNODE_POSITION_BASE+10)
+#define VNODE_POSITION_QM       (VNODE_POSITION_BASE+20)
+#define VNODE_POSITION_IO       (VNODE_POSITION_BASE+30)
+/*
+ * Macros for dealing with the behavior descriptor inside of the vnode.
+ */
+#define BHV_TO_VNODE(bdp)       ((vnode_t *)BHV_VOBJ(bdp))
+#define BHV_TO_VNODE_NULL(bdp)  ((vnode_t *)BHV_VOBJNULL(bdp))
+#define VN_BHV_HEAD(vp)                 ((bhv_head_t *)(&((vp)->v_bh)))
+#define vn_bhv_head_init(bhp,name)      bhv_head_init(bhp,name)
+#define vn_bhv_remove(bhp,bdp)          bhv_remove(bhp,bdp)
+#define vn_bhv_lookup(bhp,ops)          bhv_lookup(bhp,ops)
+#define vn_bhv_lookup_unlocked(bhp,ops) bhv_lookup_unlocked(bhp,ops)
+/*
+ * Vnode to Linux inode mapping.
+ */
+#define LINVFS_GET_VP(inode)    ((vnode_t *)list_entry(inode, vnode_t, v_inode))
+#define LINVFS_GET_IP(vp)       (&(vp)->v_inode)
+/*
+ * Convert between vnode types and inode formats (since POSIX.1
+ * defines mode word of stat structure in terms of inode formats).
+ */
+extern enum vtype       iftovt_tab[];
+extern u_short          vttoif_tab[];
+#define IFTOVT(mode)    (iftovt_tab[((mode) & S_IFMT) >> 12])
+#define VTTOIF(indx)    (vttoif_tab[(int)(indx)])
+#define MAKEIMODE(indx, mode)   (int)(VTTOIF(indx) | (mode))
+/*
+ * Vnode flags.
+ */
+#define VINACT                 0x1      /* vnode is being inactivated   */
+#define VRECLM                 0x2      /* vnode is being reclaimed     */
+#define VWAIT                  0x4      /* waiting for VINACT/VRECLM to end */
+#define VMODIFIED              0x8      /* XFS inode state possibly differs */
+                                        /* to the Linux inode state.    */
+/*
+ * Values for the VOP_RWLOCK and VOP_RWUNLOCK flags parameter.
+ */
+typedef enum vrwlock {
+        VRWLOCK_NONE,
+        VRWLOCK_READ,
+        VRWLOCK_WRITE,
+        VRWLOCK_WRITE_DIRECT,
+        VRWLOCK_TRY_READ,
+        VRWLOCK_TRY_WRITE
+} vrwlock_t;
+/*
+ * Return values for VOP_INACTIVE.  A return value of
+ * VN_INACTIVE_NOCACHE implies that the file system behavior
+ * has disassociated its state and bhv_desc_t from the vnode.
+ */
+#define VN_INACTIVE_CACHE       0
+#define VN_INACTIVE_NOCACHE     1
+/*
+ * Values for the cmd code given to VOP_VNODE_CHANGE.
+ */
+typedef enum vchange {
+        VCHANGE_FLAGS_FRLOCKS           = 0,
+        VCHANGE_FLAGS_ENF_LOCKING       = 1,
+        VCHANGE_FLAGS_TRUNCATED         = 2,
+        VCHANGE_FLAGS_PAGE_DIRTY        = 3,
+        VCHANGE_FLAGS_IOEXCL_COUNT      = 4
+} vchange_t;
+typedef int     (*vop_open_t)(bhv_desc_t *, struct cred *);
+typedef ssize_t (*vop_read_t)(bhv_desc_t *, struct kiocb *,
+                                const struct iovec *, unsigned int,
+                                loff_t *, int, struct cred *);
+typedef ssize_t (*vop_write_t)(bhv_desc_t *, struct kiocb *,
+                                const struct iovec *, unsigned int,
+                                loff_t *, int, struct cred *);
+typedef ssize_t (*vop_sendfile_t)(bhv_desc_t *, struct file *,
+                                loff_t *, int, size_t, read_actor_t,
+                                void *, struct cred *);
+typedef int     (*vop_ioctl_t)(bhv_desc_t *, struct inode *, struct file *,
+                                int, unsigned int, void __user *);
+typedef int     (*vop_getattr_t)(bhv_desc_t *, struct vattr *, int,
+                                struct cred *);
+typedef int     (*vop_setattr_t)(bhv_desc_t *, struct vattr *, int,
+                                struct cred *);
+typedef int     (*vop_access_t)(bhv_desc_t *, int, struct cred *);
+typedef int     (*vop_lookup_t)(bhv_desc_t *, vname_t *, vnode_t **,
+                                int, vnode_t *, struct cred *);
+typedef int     (*vop_create_t)(bhv_desc_t *, vname_t *, struct vattr *,
+                                vnode_t **, struct cred *);
+typedef int     (*vop_remove_t)(bhv_desc_t *, vname_t *, struct cred *);
+typedef int     (*vop_link_t)(bhv_desc_t *, vnode_t *, vname_t *,
+                                struct cred *);
+typedef int     (*vop_rename_t)(bhv_desc_t *, vname_t *, vnode_t *, vname_t *,
+                                struct cred *);
+typedef int     (*vop_mkdir_t)(bhv_desc_t *, vname_t *, struct vattr *,
+                                vnode_t **, struct cred *);
+typedef int     (*vop_rmdir_t)(bhv_desc_t *, vname_t *, struct cred *);
+typedef int     (*vop_readdir_t)(bhv_desc_t *, struct uio *, struct cred *,
+                                int *);
+typedef int     (*vop_symlink_t)(bhv_desc_t *, vname_t *, struct vattr *,
+                                char *, vnode_t **, struct cred *);
+typedef int     (*vop_readlink_t)(bhv_desc_t *, struct uio *, int,
+                                struct cred *);
+typedef int     (*vop_fsync_t)(bhv_desc_t *, int, struct cred *,
+                                xfs_off_t, xfs_off_t);
+typedef int     (*vop_inactive_t)(bhv_desc_t *, struct cred *);
+typedef int     (*vop_fid2_t)(bhv_desc_t *, struct fid *);
+typedef int     (*vop_release_t)(bhv_desc_t *);
+typedef int     (*vop_rwlock_t)(bhv_desc_t *, vrwlock_t);
+typedef void    (*vop_rwunlock_t)(bhv_desc_t *, vrwlock_t);
+typedef int     (*vop_bmap_t)(bhv_desc_t *, xfs_off_t, ssize_t, int,
+                                struct xfs_iomap *, int *);
+typedef int     (*vop_reclaim_t)(bhv_desc_t *);
+typedef int     (*vop_attr_get_t)(bhv_desc_t *, char *, char *, int *, int,
+                                struct cred *);
+typedef int     (*vop_attr_set_t)(bhv_desc_t *, char *, char *, int, int,
+                                struct cred *);
+typedef int     (*vop_attr_remove_t)(bhv_desc_t *, char *, int, struct cred *);
+typedef int     (*vop_attr_list_t)(bhv_desc_t *, char *, int, int,
+                                struct attrlist_cursor_kern *, struct cred *);
+typedef void    (*vop_link_removed_t)(bhv_desc_t *, vnode_t *, int);
+typedef void    (*vop_vnode_change_t)(bhv_desc_t *, vchange_t, __psint_t);
+typedef void    (*vop_ptossvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t, int);
+typedef void    (*vop_pflushinvalvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t, int);
+typedef int     (*vop_pflushvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t,
+                                uint64_t, int);
+typedef int     (*vop_iflush_t)(bhv_desc_t *, int);
+typedef struct vnodeops {
+        bhv_position_t  vn_position;    /* position within behavior chain */
+        vop_open_t              vop_open;
+        vop_read_t              vop_read;
+        vop_write_t             vop_write;
+        vop_sendfile_t          vop_sendfile;
+        vop_ioctl_t             vop_ioctl;
+        vop_getattr_t           vop_getattr;
+        vop_setattr_t           vop_setattr;
+        vop_access_t            vop_access;
+        vop_lookup_t            vop_lookup;
+        vop_create_t            vop_create;
+        vop_remove_t            vop_remove;
+        vop_link_t              vop_link;
+        vop_rename_t            vop_rename;
+        vop_mkdir_t             vop_mkdir;
+        vop_rmdir_t             vop_rmdir;
+        vop_readdir_t           vop_readdir;
+        vop_symlink_t           vop_symlink;
+        vop_readlink_t          vop_readlink;
+        vop_fsync_t             vop_fsync;
+        vop_inactive_t          vop_inactive;
+        vop_fid2_t              vop_fid2;
+        vop_rwlock_t            vop_rwlock;
+        vop_rwunlock_t          vop_rwunlock;
+        vop_bmap_t              vop_bmap;
+        vop_reclaim_t           vop_reclaim;
+        vop_attr_get_t          vop_attr_get;
+        vop_attr_set_t          vop_attr_set;
+        vop_attr_remove_t       vop_attr_remove;
+        vop_attr_list_t         vop_attr_list;
+        vop_link_removed_t      vop_link_removed;
+        vop_vnode_change_t      vop_vnode_change;
+        vop_ptossvp_t           vop_tosspages;
+        vop_pflushinvalvp_t     vop_flushinval_pages;
+        vop_pflushvp_t          vop_flush_pages;
+        vop_release_t           vop_release;
+        vop_iflush_t            vop_iflush;
+} vnodeops_t;
+/*
+ * VOP's.
+ */
+#define _VOP_(op, vp)   (*((vnodeops_t *)(vp)->v_fops)->op)
+#define VOP_READ(vp,file,iov,segs,offset,ioflags,cr,rv)                 \
+        rv = _VOP_(vop_read, vp)((vp)->v_fbhv,file,iov,segs,offset,ioflags,cr)
+#define VOP_WRITE(vp,file,iov,segs,offset,ioflags,cr,rv)                \
+        rv = _VOP_(vop_write, vp)((vp)->v_fbhv,file,iov,segs,offset,ioflags,cr)
+#define VOP_SENDFILE(vp,f,off,ioflags,cnt,act,targ,cr,rv)               \
+        rv = _VOP_(vop_sendfile, vp)((vp)->v_fbhv,f,off,ioflags,cnt,act,targ,cr)
+#define VOP_BMAP(vp,of,sz,rw,b,n,rv)                                    \
+        rv = _VOP_(vop_bmap, vp)((vp)->v_fbhv,of,sz,rw,b,n)
+#define VOP_OPEN(vp, cr, rv)                                            \
+        rv = _VOP_(vop_open, vp)((vp)->v_fbhv, cr)
+#define VOP_GETATTR(vp, vap, f, cr, rv)                                 \
+        rv = _VOP_(vop_getattr, vp)((vp)->v_fbhv, vap, f, cr)
+#define VOP_SETATTR(vp, vap, f, cr, rv)                                 \
+        rv = _VOP_(vop_setattr, vp)((vp)->v_fbhv, vap, f, cr)
+#define VOP_ACCESS(vp, mode, cr, rv)                                    \
+        rv = _VOP_(vop_access, vp)((vp)->v_fbhv, mode, cr)
+#define VOP_LOOKUP(vp,d,vpp,f,rdir,cr,rv)                               \
+        rv = _VOP_(vop_lookup, vp)((vp)->v_fbhv,d,vpp,f,rdir,cr)
+#define VOP_CREATE(dvp,d,vap,vpp,cr,rv)                                 \
+        rv = _VOP_(vop_create, dvp)((dvp)->v_fbhv,d,vap,vpp,cr)
+#define VOP_REMOVE(dvp,d,cr,rv)                                         \
+        rv = _VOP_(vop_remove, dvp)((dvp)->v_fbhv,d,cr)
+#define VOP_LINK(tdvp,fvp,d,cr,rv)                                      \
+        rv = _VOP_(vop_link, tdvp)((tdvp)->v_fbhv,fvp,d,cr)
+#define VOP_RENAME(fvp,fnm,tdvp,tnm,cr,rv)                              \
+        rv = _VOP_(vop_rename, fvp)((fvp)->v_fbhv,fnm,tdvp,tnm,cr)
+#define VOP_MKDIR(dp,d,vap,vpp,cr,rv)                                   \
+        rv = _VOP_(vop_mkdir, dp)((dp)->v_fbhv,d,vap,vpp,cr)
+#define VOP_RMDIR(dp,d,cr,rv)                                           \
+        rv = _VOP_(vop_rmdir, dp)((dp)->v_fbhv,d,cr)
+#define VOP_READDIR(vp,uiop,cr,eofp,rv)                                 \
+        rv = _VOP_(vop_readdir, vp)((vp)->v_fbhv,uiop,cr,eofp)
+#define VOP_SYMLINK(dvp,d,vap,tnm,vpp,cr,rv)                            \
+        rv = _VOP_(vop_symlink, dvp) ((dvp)->v_fbhv,d,vap,tnm,vpp,cr)
+#define VOP_READLINK(vp,uiop,fl,cr,rv)                                  \
+        rv = _VOP_(vop_readlink, vp)((vp)->v_fbhv,uiop,fl,cr)
+#define VOP_FSYNC(vp,f,cr,b,e,rv)                                       \
+        rv = _VOP_(vop_fsync, vp)((vp)->v_fbhv,f,cr,b,e)
+#define VOP_INACTIVE(vp, cr, rv)                                        \
+        rv = _VOP_(vop_inactive, vp)((vp)->v_fbhv, cr)
+#define VOP_RELEASE(vp, rv)                                             \
+        rv = _VOP_(vop_release, vp)((vp)->v_fbhv)
+#define VOP_FID2(vp, fidp, rv)                                          \
+        rv = _VOP_(vop_fid2, vp)((vp)->v_fbhv, fidp)
+#define VOP_RWLOCK(vp,i)                                                \
+        (void)_VOP_(vop_rwlock, vp)((vp)->v_fbhv, i)
+#define VOP_RWLOCK_TRY(vp,i)                                            \
+        _VOP_(vop_rwlock, vp)((vp)->v_fbhv, i)
+#define VOP_RWUNLOCK(vp,i)                                              \
+        (void)_VOP_(vop_rwunlock, vp)((vp)->v_fbhv, i)
+#define VOP_FRLOCK(vp,c,fl,flags,offset,fr,rv)                          \
+        rv = _VOP_(vop_frlock, vp)((vp)->v_fbhv,c,fl,flags,offset,fr)
+#define VOP_RECLAIM(vp, rv)                                             \
+        rv = _VOP_(vop_reclaim, vp)((vp)->v_fbhv)
+#define VOP_ATTR_GET(vp, name, val, vallenp, fl, cred, rv)              \
+        rv = _VOP_(vop_attr_get, vp)((vp)->v_fbhv,name,val,vallenp,fl,cred)
+#define VOP_ATTR_SET(vp, name, val, vallen, fl, cred, rv)               \
+        rv = _VOP_(vop_attr_set, vp)((vp)->v_fbhv,name,val,vallen,fl,cred)
+#define VOP_ATTR_REMOVE(vp, name, flags, cred, rv)                      \
+        rv = _VOP_(vop_attr_remove, vp)((vp)->v_fbhv,name,flags,cred)
+#define VOP_ATTR_LIST(vp, buf, buflen, fl, cursor, cred, rv)            \
+        rv = _VOP_(vop_attr_list, vp)((vp)->v_fbhv,buf,buflen,fl,cursor,cred)
+#define VOP_LINK_REMOVED(vp, dvp, linkzero)                             \
+        (void)_VOP_(vop_link_removed, vp)((vp)->v_fbhv, dvp, linkzero)
+#define VOP_VNODE_CHANGE(vp, cmd, val)                                  \
+        (void)_VOP_(vop_vnode_change, vp)((vp)->v_fbhv,cmd,val)
+/*
+ * These are page cache functions that now go thru VOPs.
+ * 'last' parameter is unused and left in for IRIX compatibility
+ */
+#define VOP_TOSS_PAGES(vp, first, last, fiopt)                          \
+        _VOP_(vop_tosspages, vp)((vp)->v_fbhv,first, last, fiopt)
+/*
+ * 'last' parameter is unused and left in for IRIX compatibility
+ */
+#define VOP_FLUSHINVAL_PAGES(vp, first, last, fiopt)                    \
+        _VOP_(vop_flushinval_pages, vp)((vp)->v_fbhv,first,last,fiopt)
+/*
+ * 'last' parameter is unused and left in for IRIX compatibility
+ */
+#define VOP_FLUSH_PAGES(vp, first, last, flags, fiopt, rv)              \
+        rv = _VOP_(vop_flush_pages, vp)((vp)->v_fbhv,first,last,flags,fiopt)
+#define VOP_IOCTL(vp, inode, filp, fl, cmd, arg, rv)                    \
+        rv = _VOP_(vop_ioctl, vp)((vp)->v_fbhv,inode,filp,fl,cmd,arg)
+#define VOP_IFLUSH(vp, flags, rv)                                       \
+        rv = _VOP_(vop_iflush, vp)((vp)->v_fbhv, flags)
+/*
+ * Flags for read/write calls - same values as IRIX
+ */
+#define IO_ISAIO        0x00001         /* don't wait for completion */
+#define IO_ISDIRECT     0x00004         /* bypass page cache */
+#define IO_INVIS        0x00020         /* don't update inode timestamps */
+/*
+ * Flags for VOP_IFLUSH call
+ */
+#define FLUSH_SYNC              1       /* wait for flush to complete   */
+#define FLUSH_INODE             2       /* flush the inode itself       */
+#define FLUSH_LOG               4       /* force the last log entry for
+                                         * this inode out to disk       */
+/*
+ * Flush/Invalidate options for VOP_TOSS_PAGES, VOP_FLUSHINVAL_PAGES and
+ *      VOP_FLUSH_PAGES.
+ */
+#define FI_NONE                 0       /* none */
+#define FI_REMAPF               1       /* Do a remapf prior to the operation */
+#define FI_REMAPF_LOCKED        2       /* Do a remapf prior to the operation.
+                                           Prevent VM access to the pages until
+                                           the operation completes. */
+/*
+ * Vnode attributes.  va_mask indicates those attributes the caller
+ * wants to set or extract.
+ */
+typedef struct vattr {
+        int             va_mask;        /* bit-mask of attributes present */
+        enum vtype      va_type;        /* vnode type (for create) */
+        mode_t          va_mode;        /* file access mode and type */
+        nlink_t         va_nlink;       /* number of references to file */
+        uid_t           va_uid;         /* owner user id */
+        gid_t           va_gid;         /* owner group id */
+        xfs_ino_t       va_nodeid;      /* file id */
+        xfs_off_t       va_size;        /* file size in bytes */
+        u_long          va_blocksize;   /* blocksize preferred for i/o */
+        struct timespec va_atime;       /* time of last access */
+        struct timespec va_mtime;       /* time of last modification */
+        struct timespec va_ctime;       /* time file changed */
+        u_int           va_gen;         /* generation number of file */
+        xfs_dev_t       va_rdev;        /* device the special file represents */
+        __int64_t       va_nblocks;     /* number of blocks allocated */
+        u_long          va_xflags;      /* random extended file flags */
+        u_long          va_extsize;     /* file extent size */
+        u_long          va_nextents;    /* number of extents in file */
+        u_long          va_anextents;   /* number of attr extents in file */
+        int             va_projid;      /* project id */
+} vattr_t;
+/*
+ * setattr or getattr attributes
+ */
+#define XFS_AT_TYPE             0x00000001
+#define XFS_AT_MODE             0x00000002
+#define XFS_AT_UID              0x00000004
+#define XFS_AT_GID              0x00000008
+#define XFS_AT_FSID             0x00000010
+#define XFS_AT_NODEID           0x00000020
+#define XFS_AT_NLINK            0x00000040
+#define XFS_AT_SIZE             0x00000080
+#define XFS_AT_ATIME            0x00000100
+#define XFS_AT_MTIME            0x00000200
+#define XFS_AT_CTIME            0x00000400
+#define XFS_AT_RDEV             0x00000800
+#define XFS_AT_BLKSIZE          0x00001000
+#define XFS_AT_NBLOCKS          0x00002000
+#define XFS_AT_VCODE            0x00004000
+#define XFS_AT_MAC              0x00008000
+#define XFS_AT_UPDATIME         0x00010000
+#define XFS_AT_UPDMTIME         0x00020000
+#define XFS_AT_UPDCTIME         0x00040000
+#define XFS_AT_ACL              0x00080000
+#define XFS_AT_CAP              0x00100000
+#define XFS_AT_INF              0x00200000
+#define XFS_AT_XFLAGS           0x00400000
+#define XFS_AT_EXTSIZE          0x00800000
+#define XFS_AT_NEXTENTS         0x01000000
+#define XFS_AT_ANEXTENTS        0x02000000
+#define XFS_AT_PROJID           0x04000000
+#define XFS_AT_SIZE_NOPERM      0x08000000
+#define XFS_AT_GENCOUNT         0x10000000
+#define XFS_AT_ALL      (XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\
+                XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\
+                XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME|XFS_AT_RDEV|\
+                XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|XFS_AT_MAC|\
+                XFS_AT_ACL|XFS_AT_CAP|XFS_AT_INF|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|\
+                XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_PROJID|XFS_AT_GENCOUNT)
+#define XFS_AT_STAT     (XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\
+                XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\
+                XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME|XFS_AT_RDEV|\
+                XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_PROJID)
+#define XFS_AT_TIMES    (XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME)
+#define XFS_AT_UPDTIMES (XFS_AT_UPDATIME|XFS_AT_UPDMTIME|XFS_AT_UPDCTIME)
+#define XFS_AT_NOSET    (XFS_AT_NLINK|XFS_AT_RDEV|XFS_AT_FSID|XFS_AT_NODEID|\
+                XFS_AT_TYPE|XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|\
+                XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_GENCOUNT)
+/*
+ *  Modes.
+ */
+#define VSUID   S_ISUID         /* set user id on execution */
+#define VSGID   S_ISGID         /* set group id on execution */
+#define VSVTX   S_ISVTX         /* save swapped text even after use */
+#define VREAD   S_IRUSR         /* read, write, execute permissions */
+#define VWRITE  S_IWUSR
+#define VEXEC   S_IXUSR
+#define MODEMASK S_IALLUGO      /* mode bits plus permission bits */
+/*
+ * Check whether mandatory file locking is enabled.
+ */
+#define MANDLOCK(vp, mode)      \
+        ((vp)->v_type == VREG && ((mode) & (VSGID|(VEXEC>>3))) == VSGID)
+extern void     vn_init(void);
+extern int      vn_wait(struct vnode *);
+extern vnode_t  *vn_initialize(struct inode *);
+/*
+ * Acquiring and invalidating vnodes:
+ *
+ *      if (vn_get(vp, version, 0))
+ *              ...;
+ *      vn_purge(vp, version);
+ *
+ * vn_get and vn_purge must be called with vmap_t arguments, sampled
+ * while a lock that the vnode's VOP_RECLAIM function acquires is
+ * held, to ensure that the vnode sampled with the lock held isn't
+ * recycled (VOP_RECLAIMed) or deallocated between the release of the lock
+ * and the subsequent vn_get or vn_purge.
+ */
+/*
+ * vnode_map structures _must_ match vn_epoch and vnode structure sizes.
+ */
+typedef struct vnode_map {
+        vfs_t           *v_vfsp;
+        vnumber_t       v_number;               /* in-core vnode number */
+        xfs_ino_t       v_ino;                  /* inode #      */
+} vmap_t;
+#define VMAP(vp, vmap)  {(vmap).v_vfsp   = (vp)->v_vfsp,        \
+                         (vmap).v_number = (vp)->v_number,      \
+                         (vmap).v_ino    = (vp)->v_inode.i_ino; }
+extern void     vn_purge(struct vnode *, vmap_t *);
+extern vnode_t  *vn_get(struct vnode *, vmap_t *);
+extern int      vn_revalidate(struct vnode *);
+extern void     vn_revalidate_core(struct vnode *, vattr_t *);
+extern void     vn_remove(struct vnode *);
+static inline int vn_count(struct vnode *vp)
+{
+        return atomic_read(&LINVFS_GET_IP(vp)->i_count);
+}
+/*
+ * Vnode reference counting functions (and macros for compatibility).
+ */
+extern vnode_t  *vn_hold(struct vnode *);
+extern void     vn_rele(struct vnode *);
+#if defined(XFS_VNODE_TRACE)
+#define VN_HOLD(vp)             \
+        ((void)vn_hold(vp),     \
+          vn_trace_hold(vp, __FILE__, __LINE__, (inst_t *)__return_address))
+#define VN_RELE(vp)             \
+          (vn_trace_rele(vp, __FILE__, __LINE__, (inst_t *)__return_address), \
+           iput(LINVFS_GET_IP(vp)))
+#else
+#define VN_HOLD(vp)             ((void)vn_hold(vp))
+#define VN_RELE(vp)             (iput(LINVFS_GET_IP(vp)))
+#endif
+/*
+ * Vname handling macros.
+ */
+#define VNAME(dentry)           ((char *) (dentry)->d_name.name)
+#define VNAMELEN(dentry)        ((dentry)->d_name.len)
+#define VNAME_TO_VNODE(dentry)  (LINVFS_GET_VP((dentry)->d_inode))
+/*
+ * Vnode spinlock manipulation.
+ */
+#define VN_LOCK(vp)             mutex_spinlock(&(vp)->v_lock)
+#define VN_UNLOCK(vp, s)        mutex_spinunlock(&(vp)->v_lock, s)
+#define VN_FLAGSET(vp,b)        vn_flagset(vp,b)
+#define VN_FLAGCLR(vp,b)        vn_flagclr(vp,b)
+static __inline__ void vn_flagset(struct vnode *vp, uint flag)
+{
+        spin_lock(&vp->v_lock);
+        vp->v_flag |= flag;
+        spin_unlock(&vp->v_lock);
+}
+static __inline__ void vn_flagclr(struct vnode *vp, uint flag)
+{
+        spin_lock(&vp->v_lock);
+        vp->v_flag &= ~flag;
+        spin_unlock(&vp->v_lock);
+}
+/*
+ * Update modify/access/change times on the vnode
+ */
+#define VN_MTIMESET(vp, tvp)    (LINVFS_GET_IP(vp)->i_mtime = *(tvp))
+#define VN_ATIMESET(vp, tvp)    (LINVFS_GET_IP(vp)->i_atime = *(tvp))
+#define VN_CTIMESET(vp, tvp)    (LINVFS_GET_IP(vp)->i_ctime = *(tvp))
+/*
+ * Dealing with bad inodes
+ */
+static inline void vn_mark_bad(struct vnode *vp)
+{
+        make_bad_inode(LINVFS_GET_IP(vp));
+}
+static inline int VN_BAD(struct vnode *vp)
+{
+        return is_bad_inode(LINVFS_GET_IP(vp));
+}
+/*
+ * Some useful predicates.
+ */
+#define VN_MAPPED(vp)   mapping_mapped(LINVFS_GET_IP(vp)->i_mapping)
+#define VN_CACHED(vp)   (LINVFS_GET_IP(vp)->i_mapping->nrpages)
+#define VN_DIRTY(vp)    mapping_tagged(LINVFS_GET_IP(vp)->i_mapping, \
+                                        PAGECACHE_TAG_DIRTY)
+#define VMODIFY(vp)     VN_FLAGSET(vp, VMODIFIED)
+#define VUNMODIFY(vp)   VN_FLAGCLR(vp, VMODIFIED)
+/*
+ * Flags to VOP_SETATTR/VOP_GETATTR.
+ */
+#define ATTR_UTIME      0x01    /* non-default utime(2) request */
+#define ATTR_DMI        0x08    /* invocation from a DMI function */
+#define ATTR_LAZY       0x80    /* set/get attributes lazily */
+#define ATTR_NONBLOCK   0x100   /* return EAGAIN if operation would block */
+/*
+ * Flags to VOP_FSYNC and VOP_RECLAIM.
+ */
+#define FSYNC_NOWAIT    0       /* asynchronous flush */
+#define FSYNC_WAIT      0x1     /* synchronous fsync or forced reclaim */
+#define FSYNC_INVAL     0x2     /* flush and invalidate cached data */
+#define FSYNC_DATA      0x4     /* synchronous fsync of data only */
+/*
+ * Tracking vnode activity.
+ */
+#if defined(XFS_VNODE_TRACE)
+#define VNODE_TRACE_SIZE        16              /* number of trace entries */
+#define VNODE_KTRACE_ENTRY      1
+#define VNODE_KTRACE_EXIT       2
+#define VNODE_KTRACE_HOLD       3
+#define VNODE_KTRACE_REF        4
+#define VNODE_KTRACE_RELE       5
+extern void vn_trace_entry(struct vnode *, char *, inst_t *);
+extern void vn_trace_exit(struct vnode *, char *, inst_t *);
+extern void vn_trace_hold(struct vnode *, char *, int, inst_t *);
+extern void vn_trace_ref(struct vnode *, char *, int, inst_t *);
+extern void vn_trace_rele(struct vnode *, char *, int, inst_t *);
+#define VN_TRACE(vp)            \
+        vn_trace_ref(vp, __FILE__, __LINE__, (inst_t *)__return_address)
+#else
+#define vn_trace_entry(a,b,c)
+#define vn_trace_exit(a,b,c)
+#define vn_trace_hold(a,b,c,d)
+#define vn_trace_ref(a,b,c,d)
+#define vn_trace_rele(a,b,c,d)
+#define VN_TRACE(vp)
+#endif
+#endif  /* __XFS_VNODE_H__ */