349 files changed, 9845 insertions, 7189 deletions
diff --git a/fs/9p/9p.c b/fs/9p/9p.c
index e847f504a47c..1a6d08761f39 100644
--- a/fs/9p/9p.c
+++ b/fs/9p/9p.c
@@ -1,8 +1,9 @@
 /*
 *  linux/fs/9p/9p.c
 *
- *  This file contains functions 9P2000 functions
+ *  This file contains functions to perform synchronous 9P calls
 *
+ *  Copyright (C) 2004 by Latchesar Ionkov <lucho@ionkov.net>
 *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
 *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
 *
@@ -33,6 +34,7 @@
 #include "debug.h"
 #include "v9fs.h"
 #include "9p.h"
+#include "conv.h"
 #include "mux.h"
 /**
@@ -46,16 +48,21 @@
 int
 v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize,
-               char *version, struct v9fs_fcall **fcall)
+               char *version, struct v9fs_fcall **rcp)
 {
-        struct v9fs_fcall msg;
+        int ret;
+        struct v9fs_fcall *tc;
        dprintk(DEBUG_9P, "msize: %d version: %s\n", msize, version);
-        msg.id = TVERSION;
+        tc = v9fs_create_tversion(msize, version);
-        msg.params.tversion.msize = msize;
-        msg.params.tversion.version = version;
-        return v9fs_mux_rpc(v9ses, &msg, fcall);
+        if (!IS_ERR(tc)) {
+                ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
+                kfree(tc);
+        } else
+                ret = PTR_ERR(tc);
+        return ret;
 }
 /**
@@ -71,19 +78,45 @@ v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize,
 int
 v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname,
-              u32 fid, u32 afid, struct v9fs_fcall **fcall)
+              u32 fid, u32 afid, struct v9fs_fcall **rcp)
 {
-        struct v9fs_fcall msg;
+        int ret;
+        struct v9fs_fcall* tc;
        dprintk(DEBUG_9P, "uname '%s' aname '%s' fid %d afid %d\n", uname,
                aname, fid, afid);
-        msg.id = TATTACH;
-        msg.params.tattach.fid = fid;
-        msg.params.tattach.afid = afid;
-        msg.params.tattach.uname = uname;
-        msg.params.tattach.aname = aname;
-        return v9fs_mux_rpc(v9ses, &msg, fcall);
+        tc = v9fs_create_tattach(fid, afid, uname, aname);
+        if (!IS_ERR(tc)) {
+                ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
+                kfree(tc);
+        } else
+                ret = PTR_ERR(tc);
+        return ret;
+}
+static void v9fs_t_clunk_cb(void *a, struct v9fs_fcall *tc,
+        struct v9fs_fcall *rc, int err)
+{
+        int fid;
+        struct v9fs_session_info *v9ses;
+        if (err)
+                return;
+        fid = tc->params.tclunk.fid;
+        kfree(tc);
+        if (!rc)
+                return;
+        dprintk(DEBUG_9P, "tcall id %d rcall id %d\n", tc->id, rc->id);
+        v9ses = a;
+        if (rc->id == RCLUNK)
+                v9fs_put_idpool(fid, &v9ses->fidpool);
+        kfree(rc);
 }
 /**
@@ -95,16 +128,25 @@ v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname,
 */
 int
-v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid,
+v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid)
-             struct v9fs_fcall **fcall)
 {
-        struct v9fs_fcall msg;
+        int ret;
+        struct v9fs_fcall *tc, *rc;
        dprintk(DEBUG_9P, "fid %d\n", fid);
-        msg.id = TCLUNK;
-        msg.params.tclunk.fid = fid;
-        return v9fs_mux_rpc(v9ses, &msg, fcall);
+        rc = NULL;
+        tc = v9fs_create_tclunk(fid);
+        if (!IS_ERR(tc))
+                ret = v9fs_mux_rpc(v9ses->mux, tc, &rc);
+        else
+                ret = PTR_ERR(tc);
+        if (ret)
+                dprintk(DEBUG_ERROR, "failed fid %d err %d\n", fid, ret);
+        v9fs_t_clunk_cb(v9ses, tc, rc, ret);
+        return ret;
 }
 /**
@@ -114,14 +156,21 @@ v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid,
 *
 */
-int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 tag)
+int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 oldtag)
 {
-        struct v9fs_fcall msg;
+        int ret;
+        struct v9fs_fcall *tc;
+        dprintk(DEBUG_9P, "oldtag %d\n", oldtag);
+        tc = v9fs_create_tflush(oldtag);
+        if (!IS_ERR(tc)) {
+                ret = v9fs_mux_rpc(v9ses->mux, tc, NULL);
+                kfree(tc);
+        } else
+                ret = PTR_ERR(tc);
-        dprintk(DEBUG_9P, "oldtag %d\n", tag);
+        return ret;
-        msg.id = TFLUSH;
-        msg.params.tflush.oldtag = tag;
-        return v9fs_mux_rpc(v9ses, &msg, NULL);
 }
 /**
@@ -133,17 +182,22 @@ int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 tag)
 */
 int
-v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, struct v9fs_fcall **fcall)
+v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, struct v9fs_fcall **rcp)
 {
-        struct v9fs_fcall msg;
+        int ret;
+        struct v9fs_fcall *tc;
        dprintk(DEBUG_9P, "fid %d\n", fid);
-        if (fcall)
-                *fcall = NULL;
-        msg.id = TSTAT;
+        ret = -ENOMEM;
-        msg.params.tstat.fid = fid;
+        tc = v9fs_create_tstat(fid);
-        return v9fs_mux_rpc(v9ses, &msg, fcall);
+        if (!IS_ERR(tc)) {
+                ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
+                kfree(tc);
+        } else
+                ret = PTR_ERR(tc);
+        return ret;
 }
 /**
@@ -157,16 +211,21 @@ v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, struct v9fs_fcall **fcall)
 int
 v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid,
-             struct v9fs_stat *stat, struct v9fs_fcall **fcall)
+             struct v9fs_wstat *wstat, struct v9fs_fcall **rcp)
 {
-        struct v9fs_fcall msg;
+        int ret;
+        struct v9fs_fcall *tc;
+        dprintk(DEBUG_9P, "fid %d\n", fid);
-        dprintk(DEBUG_9P, "fid %d length %d\n", fid, (int)stat->length);
+        tc = v9fs_create_twstat(fid, wstat, v9ses->extended);
-        msg.id = TWSTAT;
+        if (!IS_ERR(tc)) {
-        msg.params.twstat.fid = fid;
+                ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
-        msg.params.twstat.stat = stat;
+                kfree(tc);
+        } else
+                ret = PTR_ERR(tc);
-        return v9fs_mux_rpc(v9ses, &msg, fcall);
+        return ret;
 }
 /**
@@ -183,23 +242,27 @@ v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid,
 int
 v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid,
-            char *name, struct v9fs_fcall **fcall)
+            char *name, struct v9fs_fcall **rcp)
 {
-        struct v9fs_fcall msg;
+        int ret;
+        struct v9fs_fcall *tc;
+        int nwname;
        dprintk(DEBUG_9P, "fid %d newfid %d wname '%s'\n", fid, newfid, name);
-        msg.id = TWALK;
-        msg.params.twalk.fid = fid;
+        if (name)
-        msg.params.twalk.newfid = newfid;
+                nwname = 1;
+        else
-        if (name) {
+                nwname = 0;
-                msg.params.twalk.nwname = 1;
-                msg.params.twalk.wnames = &name;
+        tc = v9fs_create_twalk(fid, newfid, nwname, &name);
-        } else {
+        if (!IS_ERR(tc)) {
-                msg.params.twalk.nwname = 0;
+                ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
-        }
+                kfree(tc);
+        } else
-        return v9fs_mux_rpc(v9ses, &msg, fcall);
+                ret = PTR_ERR(tc);
+        return ret;
 }
 /**
@@ -214,19 +277,21 @@ v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid,
 int
 v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode,
-            struct v9fs_fcall **fcall)
+            struct v9fs_fcall **rcp)
 {
-        struct v9fs_fcall msg;
+        int ret;
-        long errorno = -1;
+        struct v9fs_fcall *tc;
        dprintk(DEBUG_9P, "fid %d mode %d\n", fid, mode);
-        msg.id = TOPEN;
-        msg.params.topen.fid = fid;
-        msg.params.topen.mode = mode;
-        errorno = v9fs_mux_rpc(v9ses, &msg, fcall);
+        tc = v9fs_create_topen(fid, mode);
+        if (!IS_ERR(tc)) {
+                ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
+                kfree(tc);
+        } else
+                ret = PTR_ERR(tc);
-        return errorno;
+        return ret;
 }
 /**
@@ -239,14 +304,21 @@ v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode,
 int
 v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid,
-              struct v9fs_fcall **fcall)
+              struct v9fs_fcall **rcp)
 {
-        struct v9fs_fcall msg;
+        int ret;
+        struct v9fs_fcall *tc;
        dprintk(DEBUG_9P, "fid %d\n", fid);
-        msg.id = TREMOVE;
-        msg.params.tremove.fid = fid;
+        tc = v9fs_create_tremove(fid);
-        return v9fs_mux_rpc(v9ses, &msg, fcall);
+        if (!IS_ERR(tc)) {
+                ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
+                kfree(tc);
+        } else
+                ret = PTR_ERR(tc);
+        return ret;
 }
 /**
@@ -262,20 +334,22 @@ v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid,
 int
 v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name,
-              u32 perm, u8 mode, struct v9fs_fcall **fcall)
+              u32 perm, u8 mode, struct v9fs_fcall **rcp)
 {
-        struct v9fs_fcall msg;
+        int ret;
+        struct v9fs_fcall *tc;
        dprintk(DEBUG_9P, "fid %d name '%s' perm %x mode %d\n",
                fid, name, perm, mode);
-        msg.id = TCREATE;
+        tc = v9fs_create_tcreate(fid, name, perm, mode);
-        msg.params.tcreate.fid = fid;
+        if (!IS_ERR(tc)) {
-        msg.params.tcreate.name = name;
+                ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
-        msg.params.tcreate.perm = perm;
+                kfree(tc);
-        msg.params.tcreate.mode = mode;
+        } else
+                ret = PTR_ERR(tc);
-        return v9fs_mux_rpc(v9ses, &msg, fcall);
+        return ret;
 }
 /**
@@ -290,31 +364,29 @@ v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name,
 int
 v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid, u64 offset,
-            u32 count, struct v9fs_fcall **fcall)
+            u32 count, struct v9fs_fcall **rcp)
 {
-        struct v9fs_fcall msg;
+        int ret;
-        struct v9fs_fcall *rc = NULL;
+        struct v9fs_fcall *tc, *rc;
-        long errorno = -1;
+        dprintk(DEBUG_9P, "fid %d offset 0x%llux count 0x%x\n", fid,
-        dprintk(DEBUG_9P, "fid %d offset 0x%lx count 0x%x\n", fid,
+                (long long unsigned) offset, count);
-                (long unsigned int)offset, count);
-        msg.id = TREAD;
+        tc = v9fs_create_tread(fid, offset, count);
-        msg.params.tread.fid = fid;
+        if (!IS_ERR(tc)) {
-        msg.params.tread.offset = offset;
+                ret = v9fs_mux_rpc(v9ses->mux, tc, &rc);
-        msg.params.tread.count = count;
+                if (!ret)
-        errorno = v9fs_mux_rpc(v9ses, &msg, &rc);
+                        ret = rc->params.rread.count;
+                if (rcp)
-        if (!errorno) {
+                        *rcp = rc;
-                errorno = rc->params.rread.count;
+                else
-                dump_data(rc->params.rread.data, rc->params.rread.count);
+                        kfree(rc);
-        }
+                kfree(tc);
-        if (fcall)
+        } else
-                *fcall = rc;
+                ret = PTR_ERR(tc);
-        else
-                kfree(rc);
+        return ret;
-        return errorno;
 }
 /**
@@ -328,32 +400,30 @@ v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid, u64 offset,
 */
 int
-v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid,
+v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, u64 offset, u32 count,
-             u64 offset, u32 count, void *data, struct v9fs_fcall **fcall)
+        const char __user *data, struct v9fs_fcall **rcp)
 {
-        struct v9fs_fcall msg;
+        int ret;
-        struct v9fs_fcall *rc = NULL;
+        struct v9fs_fcall *tc, *rc;
-        long errorno = -1;
-        dprintk(DEBUG_9P, "fid %d offset 0x%llx count 0x%x\n", fid,
+        dprintk(DEBUG_9P, "fid %d offset 0x%llux count 0x%x\n", fid,
-                (unsigned long long)offset, count);
+                (long long unsigned) offset, count);
-        dump_data(data, count);
-        msg.id = TWRITE;
+        tc = v9fs_create_twrite(fid, offset, count, data);
-        msg.params.twrite.fid = fid;
+        if (!IS_ERR(tc)) {
-        msg.params.twrite.offset = offset;
+                ret = v9fs_mux_rpc(v9ses->mux, tc, &rc);
-        msg.params.twrite.count = count;
-        msg.params.twrite.data = data;
-        errorno = v9fs_mux_rpc(v9ses, &msg, &rc);
+                if (!ret)
+                        ret = rc->params.rwrite.count;
+                if (rcp)
+                        *rcp = rc;
+                else
+                        kfree(rc);
-        if (!errorno)
+                kfree(tc);
-                errorno = rc->params.rwrite.count;
+        } else
+                ret = PTR_ERR(tc);
-        if (fcall)
+        return ret;
-                *fcall = rc;
-        else
-                kfree(rc);
-        return errorno;
 }
diff --git a/fs/9p/9p.h b/fs/9p/9p.h
index f55424216be2..0cd374d94717 100644
--- a/fs/9p/9p.h
+++ b/fs/9p/9p.h
@@ -3,6 +3,7 @@
 *
 * 9P protocol definitions.
 *
+ *  Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
 *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
 *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
 *
@@ -100,9 +101,18 @@ enum {
        V9FS_QTFILE = 0x00,
 };
+#define V9FS_NOTAG      (u16)(~0)
+#define V9FS_NOFID      (u32)(~0)
+#define V9FS_MAXWELEM   16
 /* ample room for Twrite/Rread header (iounit) */
 #define V9FS_IOHDRSZ    24
+struct v9fs_str {
+        u16 len;
+        char *str;
+};
 /* qids are the unique ID for a file (like an inode */
 struct v9fs_qid {
        u8 type;
@@ -120,6 +130,29 @@ struct v9fs_stat {
        u32 atime;
        u32 mtime;
        u64 length;
+        struct v9fs_str name;
+        struct v9fs_str uid;
+        struct v9fs_str gid;
+        struct v9fs_str muid;
+        struct v9fs_str extension;      /* 9p2000.u extensions */
+        u32 n_uid;              /* 9p2000.u extensions */
+        u32 n_gid;              /* 9p2000.u extensions */
+        u32 n_muid;             /* 9p2000.u extensions */
+};
+/* file metadata (stat) structure used to create Twstat message
+   The is similar to v9fs_stat, but the strings don't point to
+   the same memory block and should be freed separately
+*/
+struct v9fs_wstat {
+        u16 size;
+        u16 type;
+        u32 dev;
+        struct v9fs_qid qid;
+        u32 mode;
+        u32 atime;
+        u32 mtime;
+        u64 length;
        char *name;
        char *uid;
        char *gid;
@@ -128,25 +161,24 @@ struct v9fs_stat {
        u32 n_uid;              /* 9p2000.u extensions */
        u32 n_gid;              /* 9p2000.u extensions */
        u32 n_muid;             /* 9p2000.u extensions */
-        char data[0];
 };
 /* Structures for Protocol Operations */
 struct Tversion {
        u32 msize;
-        char *version;
+        struct v9fs_str version;
 };
 struct Rversion {
        u32 msize;
-        char *version;
+        struct v9fs_str version;
 };
 struct Tauth {
        u32 afid;
-        char *uname;
+        struct v9fs_str uname;
-        char *aname;
+        struct v9fs_str aname;
 };
 struct Rauth {
@@ -154,12 +186,12 @@ struct Rauth {
 };
 struct Rerror {
-        char *error;
+        struct v9fs_str error;
        u32 errno;              /* 9p2000.u extension */
 };
 struct Tflush {
-        u32 oldtag;
+        u16 oldtag;
 };
 struct Rflush {
@@ -168,8 +200,8 @@ struct Rflush {
 struct Tattach {
        u32 fid;
        u32 afid;
-        char *uname;
+        struct v9fs_str uname;
-        char *aname;
+        struct v9fs_str aname;
 };
 struct Rattach {
@@ -179,13 +211,13 @@ struct Rattach {
 struct Twalk {
        u32 fid;
        u32 newfid;
-        u32 nwname;
+        u16 nwname;
-        char **wnames;
+        struct v9fs_str wnames[16];
 };
 struct Rwalk {
-        u32 nwqid;
+        u16 nwqid;
-        struct v9fs_qid *wqids;
+        struct v9fs_qid wqids[16];
 };
 struct Topen {
@@ -200,7 +232,7 @@ struct Ropen {
 struct Tcreate {
        u32 fid;
-        char *name;
+        struct v9fs_str name;
        u32 perm;
        u8 mode;
 };
@@ -251,12 +283,12 @@ struct Tstat {
 };
 struct Rstat {
-        struct v9fs_stat *stat;
+        struct v9fs_stat stat;
 };
 struct Twstat {
        u32 fid;
-        struct v9fs_stat *stat;
+        struct v9fs_stat stat;
 };
 struct Rwstat {
@@ -271,6 +303,7 @@ struct v9fs_fcall {
        u32 size;
        u8 id;
        u16 tag;
+        void *sdata;
        union {
                struct Tversion tversion;
@@ -303,7 +336,9 @@ struct v9fs_fcall {
        } params;
 };
-#define FCALL_ERROR(fcall) (fcall ? fcall->params.rerror.error : "")
+#define PRINT_FCALL_ERROR(s, fcall) dprintk(DEBUG_ERROR, "%s: %.*s\n", s, \
+        fcall?fcall->params.rerror.error.len:0, \
+        fcall?fcall->params.rerror.error.str:"");
 int v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize,
                   char *version, struct v9fs_fcall **rcall);
@@ -311,8 +346,7 @@ int v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize,
 int v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname,
                  u32 fid, u32 afid, struct v9fs_fcall **rcall);
-int v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid,
+int v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid);
-                 struct v9fs_fcall **rcall);
 int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 oldtag);
@@ -320,7 +354,7 @@ int v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid,
                struct v9fs_fcall **rcall);
 int v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid,
-                 struct v9fs_stat *stat, struct v9fs_fcall **rcall);
+                 struct v9fs_wstat *wstat, struct v9fs_fcall **rcall);
 int v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid,
                char *name, struct v9fs_fcall **rcall);
@@ -338,4 +372,5 @@ int v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid,
                u64 offset, u32 count, struct v9fs_fcall **rcall);
 int v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, u64 offset,
-                 u32 count, void *data, struct v9fs_fcall **rcall);
+                 u32 count, const char __user * data,
+                 struct v9fs_fcall **rcall);
diff --git a/fs/9p/Makefile b/fs/9p/Makefile
index e4e4ffe5a7dc..2f4ce43f7b6c 100644
--- a/fs/9p/Makefile
+++ b/fs/9p/Makefile
@@ -1,17 +1,18 @@
 obj-$(CONFIG_9P_FS) := 9p2000.o
 9p2000-objs := \
+        trans_fd.o \
+        trans_sock.o \
+        mux.o \
+        9p.o \
+        conv.o \
        vfs_super.o \
        vfs_inode.o \
+        vfs_addr.o \
        vfs_file.o \
        vfs_dir.o \
        vfs_dentry.o \
        error.o \
-        mux.o \
-        trans_fd.o \
-        trans_sock.o \
-        9p.o \
-        conv.o \
        v9fs.o \
        fid.o
diff --git a/fs/9p/conv.c b/fs/9p/conv.c
index 18121af99d3e..32a9f99154e2 100644
--- a/fs/9p/conv.c
+++ b/fs/9p/conv.c
@@ -30,7 +30,7 @@
 #include <linux/errno.h>
 #include <linux/fs.h>
 #include <linux/idr.h>
+#include <asm/uaccess.h>
 #include "debug.h"
 #include "v9fs.h"
 #include "9p.h"
@@ -56,20 +56,23 @@ static inline int buf_check_overflow(struct cbuf *buf)
        return buf->p > buf->ep;
 }
-static inline int buf_check_size(struct cbuf *buf, int len)
+static int buf_check_size(struct cbuf *buf, int len)
 {
-        if (buf->p+len > buf->ep) {
+        if (buf->p + len > buf->ep) {
                if (buf->p < buf->ep) {
-                        eprintk(KERN_ERR, "buffer overflow\n");
+                        eprintk(KERN_ERR, "buffer overflow: want %d has %d\n",
+                                len, (int)(buf->ep - buf->p));
+                        dump_stack();
                        buf->p = buf->ep + 1;
-                        return 0;
                }
+                return 0;
        }
        return 1;
 }
-static inline void *buf_alloc(struct cbuf *buf, int len)
+static void *buf_alloc(struct cbuf *buf, int len)
 {
        void *ret = NULL;
@@ -81,7 +84,7 @@ static inline void *buf_alloc(struct cbuf *buf, int len)
        return ret;
 }
-static inline void buf_put_int8(struct cbuf *buf, u8 val)
+static void buf_put_int8(struct cbuf *buf, u8 val)
 {
        if (buf_check_size(buf, 1)) {
                buf->p[0] = val;
@@ -89,7 +92,7 @@ static inline void buf_put_int8(struct cbuf *buf, u8 val)
        }
 }
-static inline void buf_put_int16(struct cbuf *buf, u16 val)
+static void buf_put_int16(struct cbuf *buf, u16 val)
 {
        if (buf_check_size(buf, 2)) {
                *(__le16 *) buf->p = cpu_to_le16(val);
@@ -97,7 +100,7 @@ static inline void buf_put_int16(struct cbuf *buf, u16 val)
        }
 }
-static inline void buf_put_int32(struct cbuf *buf, u32 val)
+static void buf_put_int32(struct cbuf *buf, u32 val)
 {
        if (buf_check_size(buf, 4)) {
                *(__le32 *)buf->p = cpu_to_le32(val);
@@ -105,7 +108,7 @@ static inline void buf_put_int32(struct cbuf *buf, u32 val)
        }
 }
-static inline void buf_put_int64(struct cbuf *buf, u64 val)
+static void buf_put_int64(struct cbuf *buf, u64 val)
 {
        if (buf_check_size(buf, 8)) {
                *(__le64 *)buf->p = cpu_to_le64(val);
@@ -113,7 +116,7 @@ static inline void buf_put_int64(struct cbuf *buf, u64 val)
        }
 }
-static inline void buf_put_stringn(struct cbuf *buf, const char *s, u16 slen)
+static void buf_put_stringn(struct cbuf *buf, const char *s, u16 slen)
 {
        if (buf_check_size(buf, slen + 2)) {
                buf_put_int16(buf, slen);
@@ -127,15 +130,7 @@ static inline void buf_put_string(struct cbuf *buf, const char *s)
        buf_put_stringn(buf, s, strlen(s));
 }
-static inline void buf_put_data(struct cbuf *buf, void *data, u32 datalen)
+static u8 buf_get_int8(struct cbuf *buf)
-{
-        if (buf_check_size(buf, datalen)) {
-                memcpy(buf->p, data, datalen);
-                buf->p += datalen;
-        }
-}
-static inline u8 buf_get_int8(struct cbuf *buf)
 {
        u8 ret = 0;
@@ -147,7 +142,7 @@ static inline u8 buf_get_int8(struct cbuf *buf)
        return ret;
 }
-static inline u16 buf_get_int16(struct cbuf *buf)
+static u16 buf_get_int16(struct cbuf *buf)
 {
        u16 ret = 0;
@@ -159,7 +154,7 @@ static inline u16 buf_get_int16(struct cbuf *buf)
        return ret;
 }
-static inline u32 buf_get_int32(struct cbuf *buf)
+static u32 buf_get_int32(struct cbuf *buf)
 {
        u32 ret = 0;
@@ -171,7 +166,7 @@ static inline u32 buf_get_int32(struct cbuf *buf)
        return ret;
 }
-static inline u64 buf_get_int64(struct cbuf *buf)
+static u64 buf_get_int64(struct cbuf *buf)
 {
        u64 ret = 0;
@@ -183,86 +178,37 @@ static inline u64 buf_get_int64(struct cbuf *buf)
        return ret;
 }
-static inline int
+static void buf_get_str(struct cbuf *buf, struct v9fs_str *vstr)
-buf_get_string(struct cbuf *buf, char *data, unsigned int datalen)
-{
-        u16 len = 0;
-        len = buf_get_int16(buf);
-        if (!buf_check_overflow(buf) && buf_check_size(buf, len) && len+1>datalen) {
-                memcpy(data, buf->p, len);
-                data[len] = 0;
-                buf->p += len;
-                len++;
-        }
-        return len;
-}
-static inline char *buf_get_stringb(struct cbuf *buf, struct cbuf *sbuf)
-{
-        char *ret;
-        u16 len;
-        ret = NULL;
-        len = buf_get_int16(buf);
-        if (!buf_check_overflow(buf) && buf_check_size(buf, len) &&
-                buf_check_size(sbuf, len+1)) {
-                memcpy(sbuf->p, buf->p, len);
-                sbuf->p[len] = 0;
-                ret = sbuf->p;
-                buf->p += len;
-                sbuf->p += len + 1;
-        }
-        return ret;
-}
-static inline int buf_get_data(struct cbuf *buf, void *data, int datalen)
 {
-        int ret = 0;
+        vstr->len = buf_get_int16(buf);
+        if (!buf_check_overflow(buf) && buf_check_size(buf, vstr->len)) {
-        if (buf_check_size(buf, datalen)) {
+                vstr->str = buf->p;
-                memcpy(data, buf->p, datalen);
+                buf->p += vstr->len;
-                buf->p += datalen;
+        } else {
-                ret = datalen;
+                vstr->len = 0;
+                vstr->str = NULL;
        }
-        return ret;
 }
-static inline void *buf_get_datab(struct cbuf *buf, struct cbuf *dbuf,
+static void buf_get_qid(struct cbuf *bufp, struct v9fs_qid *qid)
-                                  int datalen)
 {
-        char *ret = NULL;
+        qid->type = buf_get_int8(bufp);
-        int n = 0;
+        qid->version = buf_get_int32(bufp);
+        qid->path = buf_get_int64(bufp);
-        if (buf_check_size(dbuf, datalen)) {
-                n = buf_get_data(buf, dbuf->p, datalen);
-                if (n > 0) {
-                        ret = dbuf->p;
-                        dbuf->p += n;
-                }
-        }
-        return ret;
 }
 /**
- * v9fs_size_stat - calculate the size of a variable length stat struct
+ * v9fs_size_wstat - calculate the size of a variable length stat struct
- * @v9ses: session information
 * @stat: metadata (stat) structure
+ * @extended: non-zero if 9P2000.u
 *
 */
-static int v9fs_size_stat(struct v9fs_session_info *v9ses,
+static int v9fs_size_wstat(struct v9fs_wstat *wstat, int extended)
-                          struct v9fs_stat *stat)
 {
        int size = 0;
-        if (stat == NULL) {
+        if (wstat == NULL) {
                eprintk(KERN_ERR, "v9fs_size_stat: got a NULL stat pointer\n");
                return 0;
        }
@@ -279,82 +225,38 @@ static int v9fs_size_stat(struct v9fs_session_info *v9ses,
            8 +                 /* length[8] */
            8;                  /* minimum sum of string lengths */
-        if (stat->name)
+        if (wstat->name)
-                size += strlen(stat->name);
+                size += strlen(wstat->name);
-        if (stat->uid)
+        if (wstat->uid)
-                size += strlen(stat->uid);
+                size += strlen(wstat->uid);
-        if (stat->gid)
+        if (wstat->gid)
-                size += strlen(stat->gid);
+                size += strlen(wstat->gid);
-        if (stat->muid)
+        if (wstat->muid)
-                size += strlen(stat->muid);
+                size += strlen(wstat->muid);
-        if (v9ses->extended) {
+        if (extended) {
                size += 4 +     /* n_uid[4] */
                    4 +         /* n_gid[4] */
                    4 +         /* n_muid[4] */
                    2;          /* string length of extension[4] */
-                if (stat->extension)
+                if (wstat->extension)
-                        size += strlen(stat->extension);
+                        size += strlen(wstat->extension);
        }
        return size;
 }
 /**
- * serialize_stat - safely format a stat structure for transmission
+ * buf_get_stat - safely decode a recieved metadata (stat) structure
- * @v9ses: session info
- * @stat: metadata (stat) structure
- * @bufp: buffer to serialize structure into
- *
- */
-static int
-serialize_stat(struct v9fs_session_info *v9ses, struct v9fs_stat *stat,
-               struct cbuf *bufp)
-{
-        buf_put_int16(bufp, stat->size);
-        buf_put_int16(bufp, stat->type);
-        buf_put_int32(bufp, stat->dev);
-        buf_put_int8(bufp, stat->qid.type);
-        buf_put_int32(bufp, stat->qid.version);
-        buf_put_int64(bufp, stat->qid.path);
-        buf_put_int32(bufp, stat->mode);
-        buf_put_int32(bufp, stat->atime);
-        buf_put_int32(bufp, stat->mtime);
-        buf_put_int64(bufp, stat->length);
-        buf_put_string(bufp, stat->name);
-        buf_put_string(bufp, stat->uid);
-        buf_put_string(bufp, stat->gid);
-        buf_put_string(bufp, stat->muid);
-        if (v9ses->extended) {
-                buf_put_string(bufp, stat->extension);
-                buf_put_int32(bufp, stat->n_uid);
-                buf_put_int32(bufp, stat->n_gid);
-                buf_put_int32(bufp, stat->n_muid);
-        }
-        if (buf_check_overflow(bufp))
-                return 0;
-        return stat->size;
-}
-/**
- * deserialize_stat - safely decode a recieved metadata (stat) structure
- * @v9ses: session info
 * @bufp: buffer to deserialize
 * @stat: metadata (stat) structure
- * @dbufp: buffer to deserialize variable strings into
+ * @extended: non-zero if 9P2000.u
 *
 */
-static inline int
+static void
-deserialize_stat(struct v9fs_session_info *v9ses, struct cbuf *bufp,
+buf_get_stat(struct cbuf *bufp, struct v9fs_stat *stat, int extended)
-                 struct v9fs_stat *stat, struct cbuf *dbufp)
 {
        stat->size = buf_get_int16(bufp);
        stat->type = buf_get_int16(bufp);
        stat->dev = buf_get_int32(bufp);
@@ -365,282 +267,82 @@ deserialize_stat(struct v9fs_session_info *v9ses, struct cbuf *bufp,
        stat->atime = buf_get_int32(bufp);
        stat->mtime = buf_get_int32(bufp);
        stat->length = buf_get_int64(bufp);
-        stat->name = buf_get_stringb(bufp, dbufp);
+        buf_get_str(bufp, &stat->name);
-        stat->uid = buf_get_stringb(bufp, dbufp);
+        buf_get_str(bufp, &stat->uid);
-        stat->gid = buf_get_stringb(bufp, dbufp);
+        buf_get_str(bufp, &stat->gid);
-        stat->muid = buf_get_stringb(bufp, dbufp);
+        buf_get_str(bufp, &stat->muid);
-        if (v9ses->extended) {
+        if (extended) {
-                stat->extension = buf_get_stringb(bufp, dbufp);
+                buf_get_str(bufp, &stat->extension);
                stat->n_uid = buf_get_int32(bufp);
                stat->n_gid = buf_get_int32(bufp);
                stat->n_muid = buf_get_int32(bufp);
        }
-        if (buf_check_overflow(bufp) || buf_check_overflow(dbufp))
-                return 0;
-        return stat->size + 2;
-}
-/**
- * deserialize_statb - wrapper for decoding a received metadata structure
- * @v9ses: session info
- * @bufp: buffer to deserialize
- * @dbufp: buffer to deserialize variable strings into
- *
- */
-static inline struct v9fs_stat *deserialize_statb(struct v9fs_session_info
-                                                  *v9ses, struct cbuf *bufp,
-                                                  struct cbuf *dbufp)
-{
-        struct v9fs_stat *ret = buf_alloc(dbufp, sizeof(struct v9fs_stat));
-        if (ret) {
-                int n = deserialize_stat(v9ses, bufp, ret, dbufp);
-                if (n <= 0)
-                        return NULL;
-        }
-        return ret;
 }
 /**
 * v9fs_deserialize_stat - decode a received metadata structure
- * @v9ses: session info
 * @buf: buffer to deserialize
 * @buflen: length of received buffer
 * @stat: metadata structure to decode into
- * @statlen: length of destination metadata structure
+ * @extended: non-zero if 9P2000.u
 *
+ * Note: stat will point to the buf region.
 */
 int
-v9fs_deserialize_stat(struct v9fs_session_info *v9ses, void *buf,
+v9fs_deserialize_stat(void *buf, u32 buflen, struct v9fs_stat *stat,
-                      u32 buflen, struct v9fs_stat *stat, u32 statlen)
+                int extended)
 {
        struct cbuf buffer;
        struct cbuf *bufp = &buffer;
-        struct cbuf dbuffer;
+        unsigned char *p;
-        struct cbuf *dbufp = &dbuffer;
        buf_init(bufp, buf, buflen);
-        buf_init(dbufp, (char *)stat + sizeof(struct v9fs_stat),
+        p = bufp->p;
-                 statlen - sizeof(struct v9fs_stat));
+        buf_get_stat(bufp, stat, extended);
-        return deserialize_stat(v9ses, bufp, stat, dbufp);
-}
-static inline int
-v9fs_size_fcall(struct v9fs_session_info *v9ses, struct v9fs_fcall *fcall)
-{
-        int size = 4 + 1 + 2;   /* size[4] msg[1] tag[2] */
-        int i = 0;
-        switch (fcall->id) {
-        default:
-                eprintk(KERN_ERR, "bad msg type %d\n", fcall->id);
-                return 0;
-        case TVERSION:          /* msize[4] version[s] */
-                size += 4 + 2 + strlen(fcall->params.tversion.version);
-                break;
-        case TAUTH:             /* afid[4] uname[s] aname[s] */
-                size += 4 + 2 + strlen(fcall->params.tauth.uname) +
-                    2 + strlen(fcall->params.tauth.aname);
-                break;
-        case TFLUSH:            /* oldtag[2] */
-                size += 2;
-                break;
-        case TATTACH:           /* fid[4] afid[4] uname[s] aname[s] */
-                size += 4 + 4 + 2 + strlen(fcall->params.tattach.uname) +
-                    2 + strlen(fcall->params.tattach.aname);
-                break;
-        case TWALK:             /* fid[4] newfid[4] nwname[2] nwname*(wname[s]) */
-                size += 4 + 4 + 2;
-                /* now compute total for the array of names */
-                for (i = 0; i < fcall->params.twalk.nwname; i++)
-                        size += 2 + strlen(fcall->params.twalk.wnames[i]);
-                break;
-        case TOPEN:             /* fid[4] mode[1] */
-                size += 4 + 1;
-                break;
-        case TCREATE:           /* fid[4] name[s] perm[4] mode[1] */
-                size += 4 + 2 + strlen(fcall->params.tcreate.name) + 4 + 1;
-                break;
-        case TREAD:             /* fid[4] offset[8] count[4] */
-                size += 4 + 8 + 4;
-                break;
-        case TWRITE:            /* fid[4] offset[8] count[4] data[count] */
-                size += 4 + 8 + 4 + fcall->params.twrite.count;
-                break;
-        case TCLUNK:            /* fid[4] */
-                size += 4;
-                break;
-        case TREMOVE:           /* fid[4] */
-                size += 4;
-                break;
-        case TSTAT:             /* fid[4] */
-                size += 4;
-                break;
-        case TWSTAT:            /* fid[4] stat[n] */
-                fcall->params.twstat.stat->size =
-                    v9fs_size_stat(v9ses, fcall->params.twstat.stat);
-                size += 4 + 2 + 2 + fcall->params.twstat.stat->size;
-        }
-        return size;
-}
-/*
- * v9fs_serialize_fcall - marshall fcall struct into a packet
- * @v9ses: session information
- * @fcall: structure to convert
- * @data: buffer to serialize fcall into
- * @datalen: length of buffer to serialize fcall into
- *
- */
-int
-v9fs_serialize_fcall(struct v9fs_session_info *v9ses, struct v9fs_fcall *fcall,
-                     void *data, u32 datalen)
-{
-        int i = 0;
-        struct v9fs_stat *stat = NULL;
-        struct cbuf buffer;
-        struct cbuf *bufp = &buffer;
-        buf_init(bufp, data, datalen);
-        if (!fcall) {
-                eprintk(KERN_ERR, "no fcall\n");
-                return -EINVAL;
-        }
-        fcall->size = v9fs_size_fcall(v9ses, fcall);
-        buf_put_int32(bufp, fcall->size);
-        buf_put_int8(bufp, fcall->id);
-        buf_put_int16(bufp, fcall->tag);
-        dprintk(DEBUG_CONV, "size %d id %d tag %d\n", fcall->size, fcall->id,
-                fcall->tag);
-        /* now encode it */
-        switch (fcall->id) {
-        default:
-                eprintk(KERN_ERR, "bad msg type: %d\n", fcall->id);
-                return -EPROTO;
-        case TVERSION:
-                buf_put_int32(bufp, fcall->params.tversion.msize);
-                buf_put_string(bufp, fcall->params.tversion.version);
-                break;
-        case TAUTH:
-                buf_put_int32(bufp, fcall->params.tauth.afid);
-                buf_put_string(bufp, fcall->params.tauth.uname);
-                buf_put_string(bufp, fcall->params.tauth.aname);
-                break;
-        case TFLUSH:
-                buf_put_int16(bufp, fcall->params.tflush.oldtag);
-                break;
-        case TATTACH:
-                buf_put_int32(bufp, fcall->params.tattach.fid);
-                buf_put_int32(bufp, fcall->params.tattach.afid);
-                buf_put_string(bufp, fcall->params.tattach.uname);
-                buf_put_string(bufp, fcall->params.tattach.aname);
-                break;
-        case TWALK:
-                buf_put_int32(bufp, fcall->params.twalk.fid);
-                buf_put_int32(bufp, fcall->params.twalk.newfid);
-                buf_put_int16(bufp, fcall->params.twalk.nwname);
-                for (i = 0; i < fcall->params.twalk.nwname; i++)
-                        buf_put_string(bufp, fcall->params.twalk.wnames[i]);
-                break;
-        case TOPEN:
-                buf_put_int32(bufp, fcall->params.topen.fid);
-                buf_put_int8(bufp, fcall->params.topen.mode);
-                break;
-        case TCREATE:
-                buf_put_int32(bufp, fcall->params.tcreate.fid);
-                buf_put_string(bufp, fcall->params.tcreate.name);
-                buf_put_int32(bufp, fcall->params.tcreate.perm);
-                buf_put_int8(bufp, fcall->params.tcreate.mode);
-                break;
-        case TREAD:
-                buf_put_int32(bufp, fcall->params.tread.fid);
-                buf_put_int64(bufp, fcall->params.tread.offset);
-                buf_put_int32(bufp, fcall->params.tread.count);
-                break;
-        case TWRITE:
-                buf_put_int32(bufp, fcall->params.twrite.fid);
-                buf_put_int64(bufp, fcall->params.twrite.offset);
-                buf_put_int32(bufp, fcall->params.twrite.count);
-                buf_put_data(bufp, fcall->params.twrite.data,
-                             fcall->params.twrite.count);
-                break;
-        case TCLUNK:
-                buf_put_int32(bufp, fcall->params.tclunk.fid);
-                break;
-        case TREMOVE:
-                buf_put_int32(bufp, fcall->params.tremove.fid);
-                break;
-        case TSTAT:
-                buf_put_int32(bufp, fcall->params.tstat.fid);
-                break;
-        case TWSTAT:
-                buf_put_int32(bufp, fcall->params.twstat.fid);
-                stat = fcall->params.twstat.stat;
-                buf_put_int16(bufp, stat->size + 2);
-                serialize_stat(v9ses, stat, bufp);
-                break;
-        }
        if (buf_check_overflow(bufp))
-                return -EIO;
+                return 0;
+        else
-        return fcall->size;
+                return bufp->p - p;
 }
 /**
 * deserialize_fcall - unmarshal a response
- * @v9ses: session information
- * @msgsize: size of rcall message
 * @buf: recieved buffer
 * @buflen: length of received buffer
 * @rcall: fcall structure to populate
 * @rcalllen: length of fcall structure to populate
+ * @extended: non-zero if 9P2000.u
 *
 */
 int
-v9fs_deserialize_fcall(struct v9fs_session_info *v9ses, u32 msgsize,
+v9fs_deserialize_fcall(void *buf, u32 buflen, struct v9fs_fcall *rcall,
-                       void *buf, u32 buflen, struct v9fs_fcall *rcall,
+                       int extended)
-                       int rcalllen)
 {
        struct cbuf buffer;
        struct cbuf *bufp = &buffer;
-        struct cbuf dbuffer;
-        struct cbuf *dbufp = &dbuffer;
        int i = 0;
        buf_init(bufp, buf, buflen);
-        buf_init(dbufp, (char *)rcall + sizeof(struct v9fs_fcall),
-                 rcalllen - sizeof(struct v9fs_fcall));
-        rcall->size = msgsize;
+        rcall->size = buf_get_int32(bufp);
        rcall->id = buf_get_int8(bufp);
        rcall->tag = buf_get_int16(bufp);
        dprintk(DEBUG_CONV, "size %d id %d tag %d\n", rcall->size, rcall->id,
                rcall->tag);
        switch (rcall->id) {
        default:
                eprintk(KERN_ERR, "unknown message type: %d\n", rcall->id);
                return -EPROTO;
        case RVERSION:
                rcall->params.rversion.msize = buf_get_int32(bufp);
-                rcall->params.rversion.version = buf_get_stringb(bufp, dbufp);
+                buf_get_str(bufp, &rcall->params.rversion.version);
                break;
        case RFLUSH:
                break;
@@ -651,34 +353,27 @@ v9fs_deserialize_fcall(struct v9fs_session_info *v9ses, u32 msgsize,
                break;
        case RWALK:
                rcall->params.rwalk.nwqid = buf_get_int16(bufp);
-                rcall->params.rwalk.wqids = buf_alloc(dbufp,
+                if (rcall->params.rwalk.nwqid > V9FS_MAXWELEM) {
-                      rcall->params.rwalk.nwqid * sizeof(struct v9fs_qid));
+                        eprintk(KERN_ERR, "Rwalk with more than %d qids: %d\n",
-                if (rcall->params.rwalk.wqids)
+                                V9FS_MAXWELEM, rcall->params.rwalk.nwqid);
-                        for (i = 0; i < rcall->params.rwalk.nwqid; i++) {
+                        return -EPROTO;
-                                rcall->params.rwalk.wqids[i].type =
+                }
-                                    buf_get_int8(bufp);
-                                rcall->params.rwalk.wqids[i].version =
+                for (i = 0; i < rcall->params.rwalk.nwqid; i++)
-                                    buf_get_int16(bufp);
+                        buf_get_qid(bufp, &rcall->params.rwalk.wqids[i]);
-                                rcall->params.rwalk.wqids[i].path =
-                                    buf_get_int64(bufp);
-                        }
                break;
        case ROPEN:
-                rcall->params.ropen.qid.type = buf_get_int8(bufp);
+                buf_get_qid(bufp, &rcall->params.ropen.qid);
-                rcall->params.ropen.qid.version = buf_get_int32(bufp);
-                rcall->params.ropen.qid.path = buf_get_int64(bufp);
                rcall->params.ropen.iounit = buf_get_int32(bufp);
                break;
        case RCREATE:
-                rcall->params.rcreate.qid.type = buf_get_int8(bufp);
+                buf_get_qid(bufp, &rcall->params.rcreate.qid);
-                rcall->params.rcreate.qid.version = buf_get_int32(bufp);
-                rcall->params.rcreate.qid.path = buf_get_int64(bufp);
                rcall->params.rcreate.iounit = buf_get_int32(bufp);
                break;
        case RREAD:
                rcall->params.rread.count = buf_get_int32(bufp);
-                rcall->params.rread.data = buf_get_datab(bufp, dbufp,
+                rcall->params.rread.data = bufp->p;
-                        rcall->params.rread.count);
+                buf_check_size(bufp, rcall->params.rread.count);
                break;
        case RWRITE:
                rcall->params.rwrite.count = buf_get_int32(bufp);
@@ -689,20 +384,443 @@ v9fs_deserialize_fcall(struct v9fs_session_info *v9ses, u32 msgsize,
                break;
        case RSTAT:
                buf_get_int16(bufp);
-                rcall->params.rstat.stat =
+                buf_get_stat(bufp, &rcall->params.rstat.stat, extended);
-                    deserialize_statb(v9ses, bufp, dbufp);
                break;
        case RWSTAT:
                break;
        case RERROR:
-                rcall->params.rerror.error = buf_get_stringb(bufp, dbufp);
+                buf_get_str(bufp, &rcall->params.rerror.error);
-                if (v9ses->extended)
+                if (extended)
                        rcall->params.rerror.errno = buf_get_int16(bufp);
                break;
        }
-        if (buf_check_overflow(bufp) || buf_check_overflow(dbufp))
+        if (buf_check_overflow(bufp)) {
+                dprintk(DEBUG_ERROR, "buffer overflow\n");
                return -EIO;
+        }
+        return bufp->p - bufp->sp;
+}
+static inline void v9fs_put_int8(struct cbuf *bufp, u8 val, u8 * p)
+{
+        *p = val;
+        buf_put_int8(bufp, val);
+}
+static inline void v9fs_put_int16(struct cbuf *bufp, u16 val, u16 * p)
+{
+        *p = val;
+        buf_put_int16(bufp, val);
+}
+static inline void v9fs_put_int32(struct cbuf *bufp, u32 val, u32 * p)
+{
+        *p = val;
+        buf_put_int32(bufp, val);
+}
+static inline void v9fs_put_int64(struct cbuf *bufp, u64 val, u64 * p)
+{
+        *p = val;
+        buf_put_int64(bufp, val);
+}
+static void
+v9fs_put_str(struct cbuf *bufp, char *data, struct v9fs_str *str)
+{
+        if (data) {
+                str->len = strlen(data);
+                str->str = bufp->p;
+        } else {
+                str->len = 0;
+                str->str = NULL;
+        }
+        buf_put_stringn(bufp, data, str->len);
+}
+static int
+v9fs_put_user_data(struct cbuf *bufp, const char __user * data, int count,
+                   unsigned char **pdata)
+{
+        *pdata = buf_alloc(bufp, count);
+        return copy_from_user(*pdata, data, count);
+}
+static void
+v9fs_put_wstat(struct cbuf *bufp, struct v9fs_wstat *wstat,
+               struct v9fs_stat *stat, int statsz, int extended)
+{
+        v9fs_put_int16(bufp, statsz, &stat->size);
+        v9fs_put_int16(bufp, wstat->type, &stat->type);
+        v9fs_put_int32(bufp, wstat->dev, &stat->dev);
+        v9fs_put_int8(bufp, wstat->qid.type, &stat->qid.type);
+        v9fs_put_int32(bufp, wstat->qid.version, &stat->qid.version);
+        v9fs_put_int64(bufp, wstat->qid.path, &stat->qid.path);
+        v9fs_put_int32(bufp, wstat->mode, &stat->mode);
+        v9fs_put_int32(bufp, wstat->atime, &stat->atime);
+        v9fs_put_int32(bufp, wstat->mtime, &stat->mtime);
+        v9fs_put_int64(bufp, wstat->length, &stat->length);
+        v9fs_put_str(bufp, wstat->name, &stat->name);
+        v9fs_put_str(bufp, wstat->uid, &stat->uid);
+        v9fs_put_str(bufp, wstat->gid, &stat->gid);
+        v9fs_put_str(bufp, wstat->muid, &stat->muid);
+        if (extended) {
+                v9fs_put_str(bufp, wstat->extension, &stat->extension);
+                v9fs_put_int32(bufp, wstat->n_uid, &stat->n_uid);
+                v9fs_put_int32(bufp, wstat->n_gid, &stat->n_gid);
+                v9fs_put_int32(bufp, wstat->n_muid, &stat->n_muid);
+        }
+}
+static struct v9fs_fcall *
+v9fs_create_common(struct cbuf *bufp, u32 size, u8 id)
+{
+        struct v9fs_fcall *fc;
+        size += 4 + 1 + 2;      /* size[4] id[1] tag[2] */
+        fc = kmalloc(sizeof(struct v9fs_fcall) + size, GFP_KERNEL);
+        if (!fc)
+                return ERR_PTR(-ENOMEM);
+        fc->sdata = (char *)fc + sizeof(*fc);
+        buf_init(bufp, (char *)fc->sdata, size);
+        v9fs_put_int32(bufp, size, &fc->size);
+        v9fs_put_int8(bufp, id, &fc->id);
+        v9fs_put_int16(bufp, V9FS_NOTAG, &fc->tag);
+        return fc;
+}
+void v9fs_set_tag(struct v9fs_fcall *fc, u16 tag)
+{
+        fc->tag = tag;
+        *(__le16 *) (fc->sdata + 5) = cpu_to_le16(tag);
+}
+struct v9fs_fcall *v9fs_create_tversion(u32 msize, char *version)
+{
+        int size;
+        struct v9fs_fcall *fc;
+        struct cbuf buffer;
+        struct cbuf *bufp = &buffer;
+        size = 4 + 2 + strlen(version); /* msize[4] version[s] */
+        fc = v9fs_create_common(bufp, size, TVERSION);
+        if (IS_ERR(fc))
+                goto error;
+        v9fs_put_int32(bufp, msize, &fc->params.tversion.msize);
+        v9fs_put_str(bufp, version, &fc->params.tversion.version);
+        if (buf_check_overflow(bufp)) {
+                kfree(fc);
+                fc = ERR_PTR(-ENOMEM);
+        }
+      error:
+        return fc;
+}
+struct v9fs_fcall *v9fs_create_tauth(u32 afid, char *uname, char *aname)
+{
+        int size;
+        struct v9fs_fcall *fc;
+        struct cbuf buffer;
+        struct cbuf *bufp = &buffer;
+        size = 4 + 2 + strlen(uname) + 2 + strlen(aname);       /* afid[4] uname[s] aname[s] */
+        fc = v9fs_create_common(bufp, size, TAUTH);
+        if (IS_ERR(fc))
+                goto error;
+        v9fs_put_int32(bufp, afid, &fc->params.tauth.afid);
+        v9fs_put_str(bufp, uname, &fc->params.tauth.uname);
+        v9fs_put_str(bufp, aname, &fc->params.tauth.aname);
+        if (buf_check_overflow(bufp)) {
+                kfree(fc);
+                fc = ERR_PTR(-ENOMEM);
+        }
+      error:
+        return fc;
+}
+struct v9fs_fcall *
+v9fs_create_tattach(u32 fid, u32 afid, char *uname, char *aname)
+{
+        int size;
+        struct v9fs_fcall *fc;
+        struct cbuf buffer;
+        struct cbuf *bufp = &buffer;
+        size = 4 + 4 + 2 + strlen(uname) + 2 + strlen(aname);   /* fid[4] afid[4] uname[s] aname[s] */
+        fc = v9fs_create_common(bufp, size, TATTACH);
+        if (IS_ERR(fc))
+                goto error;
+        v9fs_put_int32(bufp, fid, &fc->params.tattach.fid);
+        v9fs_put_int32(bufp, afid, &fc->params.tattach.afid);
+        v9fs_put_str(bufp, uname, &fc->params.tattach.uname);
+        v9fs_put_str(bufp, aname, &fc->params.tattach.aname);
+      error:
+        return fc;
+}
+struct v9fs_fcall *v9fs_create_tflush(u16 oldtag)
+{
+        int size;
+        struct v9fs_fcall *fc;
+        struct cbuf buffer;
+        struct cbuf *bufp = &buffer;
+        size = 2;               /* oldtag[2] */
+        fc = v9fs_create_common(bufp, size, TFLUSH);
+        if (IS_ERR(fc))
+                goto error;
+        v9fs_put_int16(bufp, oldtag, &fc->params.tflush.oldtag);
+        if (buf_check_overflow(bufp)) {
+                kfree(fc);
+                fc = ERR_PTR(-ENOMEM);
+        }
+      error:
+        return fc;
+}
+struct v9fs_fcall *v9fs_create_twalk(u32 fid, u32 newfid, u16 nwname,
+                                     char **wnames)
+{
+        int i, size;
+        struct v9fs_fcall *fc;
+        struct cbuf buffer;
+        struct cbuf *bufp = &buffer;
+        if (nwname > V9FS_MAXWELEM) {
+                dprintk(DEBUG_ERROR, "nwname > %d\n", V9FS_MAXWELEM);
+                return NULL;
+        }
+        size = 4 + 4 + 2;       /* fid[4] newfid[4] nwname[2] ... */
+        for (i = 0; i < nwname; i++) {
+                size += 2 + strlen(wnames[i]);  /* wname[s] */
+        }
+        fc = v9fs_create_common(bufp, size, TWALK);
+        if (IS_ERR(fc))
+                goto error;
+        v9fs_put_int32(bufp, fid, &fc->params.twalk.fid);
+        v9fs_put_int32(bufp, newfid, &fc->params.twalk.newfid);
+        v9fs_put_int16(bufp, nwname, &fc->params.twalk.nwname);
+        for (i = 0; i < nwname; i++) {
+                v9fs_put_str(bufp, wnames[i], &fc->params.twalk.wnames[i]);
+        }
+        if (buf_check_overflow(bufp)) {
+                kfree(fc);
+                fc = ERR_PTR(-ENOMEM);
+        }
+      error:
+        return fc;
+}
-        return rcall->size;
+struct v9fs_fcall *v9fs_create_topen(u32 fid, u8 mode)
+{
+        int size;
+        struct v9fs_fcall *fc;
+        struct cbuf buffer;
+        struct cbuf *bufp = &buffer;
+        size = 4 + 1;           /* fid[4] mode[1] */
+        fc = v9fs_create_common(bufp, size, TOPEN);
+        if (IS_ERR(fc))
+                goto error;
+        v9fs_put_int32(bufp, fid, &fc->params.topen.fid);
+        v9fs_put_int8(bufp, mode, &fc->params.topen.mode);
+        if (buf_check_overflow(bufp)) {
+                kfree(fc);
+                fc = ERR_PTR(-ENOMEM);
+        }
+      error:
+        return fc;
+}
+struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode)
+{
+        int size;
+        struct v9fs_fcall *fc;
+        struct cbuf buffer;
+        struct cbuf *bufp = &buffer;
+        size = 4 + 2 + strlen(name) + 4 + 1;    /* fid[4] name[s] perm[4] mode[1] */
+        fc = v9fs_create_common(bufp, size, TCREATE);
+        if (IS_ERR(fc))
+                goto error;
+        v9fs_put_int32(bufp, fid, &fc->params.tcreate.fid);
+        v9fs_put_str(bufp, name, &fc->params.tcreate.name);
+        v9fs_put_int32(bufp, perm, &fc->params.tcreate.perm);
+        v9fs_put_int8(bufp, mode, &fc->params.tcreate.mode);
+        if (buf_check_overflow(bufp)) {
+                kfree(fc);
+                fc = ERR_PTR(-ENOMEM);
+        }
+      error:
+        return fc;
+}
+struct v9fs_fcall *v9fs_create_tread(u32 fid, u64 offset, u32 count)
+{
+        int size;
+        struct v9fs_fcall *fc;
+        struct cbuf buffer;
+        struct cbuf *bufp = &buffer;
+        size = 4 + 8 + 4;       /* fid[4] offset[8] count[4] */
+        fc = v9fs_create_common(bufp, size, TREAD);
+        if (IS_ERR(fc))
+                goto error;
+        v9fs_put_int32(bufp, fid, &fc->params.tread.fid);
+        v9fs_put_int64(bufp, offset, &fc->params.tread.offset);
+        v9fs_put_int32(bufp, count, &fc->params.tread.count);
+        if (buf_check_overflow(bufp)) {
+                kfree(fc);
+                fc = ERR_PTR(-ENOMEM);
+        }
+      error:
+        return fc;
+}
+struct v9fs_fcall *v9fs_create_twrite(u32 fid, u64 offset, u32 count,
+                                      const char __user * data)
+{
+        int size, err;
+        struct v9fs_fcall *fc;
+        struct cbuf buffer;
+        struct cbuf *bufp = &buffer;
+        size = 4 + 8 + 4 + count;       /* fid[4] offset[8] count[4] data[count] */
+        fc = v9fs_create_common(bufp, size, TWRITE);
+        if (IS_ERR(fc))
+                goto error;
+        v9fs_put_int32(bufp, fid, &fc->params.twrite.fid);
+        v9fs_put_int64(bufp, offset, &fc->params.twrite.offset);
+        v9fs_put_int32(bufp, count, &fc->params.twrite.count);
+        err = v9fs_put_user_data(bufp, data, count, &fc->params.twrite.data);
+        if (err) {
+                kfree(fc);
+                fc = ERR_PTR(err);
+        }
+        if (buf_check_overflow(bufp)) {
+                kfree(fc);
+                fc = ERR_PTR(-ENOMEM);
+        }
+      error:
+        return fc;
+}
+struct v9fs_fcall *v9fs_create_tclunk(u32 fid)
+{
+        int size;
+        struct v9fs_fcall *fc;
+        struct cbuf buffer;
+        struct cbuf *bufp = &buffer;
+        size = 4;               /* fid[4] */
+        fc = v9fs_create_common(bufp, size, TCLUNK);
+        if (IS_ERR(fc))
+                goto error;
+        v9fs_put_int32(bufp, fid, &fc->params.tclunk.fid);
+        if (buf_check_overflow(bufp)) {
+                kfree(fc);
+                fc = ERR_PTR(-ENOMEM);
+        }
+      error:
+        return fc;
+}
+struct v9fs_fcall *v9fs_create_tremove(u32 fid)
+{
+        int size;
+        struct v9fs_fcall *fc;
+        struct cbuf buffer;
+        struct cbuf *bufp = &buffer;
+        size = 4;               /* fid[4] */
+        fc = v9fs_create_common(bufp, size, TREMOVE);
+        if (IS_ERR(fc))
+                goto error;
+        v9fs_put_int32(bufp, fid, &fc->params.tremove.fid);
+        if (buf_check_overflow(bufp)) {
+                kfree(fc);
+                fc = ERR_PTR(-ENOMEM);
+        }
+      error:
+        return fc;
+}
+struct v9fs_fcall *v9fs_create_tstat(u32 fid)
+{
+        int size;
+        struct v9fs_fcall *fc;
+        struct cbuf buffer;
+        struct cbuf *bufp = &buffer;
+        size = 4;               /* fid[4] */
+        fc = v9fs_create_common(bufp, size, TSTAT);
+        if (IS_ERR(fc))
+                goto error;
+        v9fs_put_int32(bufp, fid, &fc->params.tstat.fid);
+        if (buf_check_overflow(bufp)) {
+                kfree(fc);
+                fc = ERR_PTR(-ENOMEM);
+        }
+      error:
+        return fc;
+}
+struct v9fs_fcall *v9fs_create_twstat(u32 fid, struct v9fs_wstat *wstat,
+                                      int extended)
+{
+        int size, statsz;
+        struct v9fs_fcall *fc;
+        struct cbuf buffer;
+        struct cbuf *bufp = &buffer;
+        statsz = v9fs_size_wstat(wstat, extended);
+        size = 4 + 2 + 2 + statsz;      /* fid[4] stat[n] */
+        fc = v9fs_create_common(bufp, size, TWSTAT);
+        if (IS_ERR(fc))
+                goto error;
+        v9fs_put_int32(bufp, fid, &fc->params.twstat.fid);
+        buf_put_int16(bufp, statsz + 2);
+        v9fs_put_wstat(bufp, wstat, &fc->params.twstat.stat, statsz, extended);
+        if (buf_check_overflow(bufp)) {
+                kfree(fc);
+                fc = ERR_PTR(-ENOMEM);
+        }
+      error:
+        return fc;
 }
diff --git a/fs/9p/conv.h b/fs/9p/conv.h
index ee849613c61a..26a736e4a2e7 100644
--- a/fs/9p/conv.h
+++ b/fs/9p/conv.h
@@ -1,8 +1,9 @@
 /*
 * linux/fs/9p/conv.h
 *
- * 9P protocol conversion definitions
+ * 9P protocol conversion definitions.
 *
+ *  Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
 *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
 *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
 *
@@ -24,13 +25,27 @@
 *
 */
-int v9fs_deserialize_stat(struct v9fs_session_info *, void *buf,
+int v9fs_deserialize_stat(void *buf, u32 buflen, struct v9fs_stat *stat,
-                          u32 buflen, struct v9fs_stat *stat, u32 statlen);
+        int extended);
-int v9fs_serialize_fcall(struct v9fs_session_info *, struct v9fs_fcall *tcall,
+int v9fs_deserialize_fcall(void *buf, u32 buflen, struct v9fs_fcall *rcall,
-                         void *buf, u32 buflen);
+        int extended);
-int v9fs_deserialize_fcall(struct v9fs_session_info *, u32 msglen,
-                           void *buf, u32 buflen, struct v9fs_fcall *rcall,
-                           int rcalllen);
-/* this one is actually in error.c right now */
+void v9fs_set_tag(struct v9fs_fcall *fc, u16 tag);
-int v9fs_errstr2errno(char *errstr);
+struct v9fs_fcall *v9fs_create_tversion(u32 msize, char *version);
+struct v9fs_fcall *v9fs_create_tauth(u32 afid, char *uname, char *aname);
+struct v9fs_fcall *v9fs_create_tattach(u32 fid, u32 afid, char *uname,
+        char *aname);
+struct v9fs_fcall *v9fs_create_tflush(u16 oldtag);
+struct v9fs_fcall *v9fs_create_twalk(u32 fid, u32 newfid, u16 nwname,
+        char **wnames);
+struct v9fs_fcall *v9fs_create_topen(u32 fid, u8 mode);
+struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode);
+struct v9fs_fcall *v9fs_create_tread(u32 fid, u64 offset, u32 count);
+struct v9fs_fcall *v9fs_create_twrite(u32 fid, u64 offset, u32 count,
+        const char __user *data);
+struct v9fs_fcall *v9fs_create_tclunk(u32 fid);
+struct v9fs_fcall *v9fs_create_tremove(u32 fid);
+struct v9fs_fcall *v9fs_create_tstat(u32 fid);
+struct v9fs_fcall *v9fs_create_twstat(u32 fid, struct v9fs_wstat *wstat,
+        int extended);
diff --git a/fs/9p/debug.h b/fs/9p/debug.h
index 4445f06919d9..fe551032788b 100644
--- a/fs/9p/debug.h
+++ b/fs/9p/debug.h
@@ -51,16 +51,23 @@ do { \
 #if DEBUG_DUMP_PKT
 static inline void dump_data(const unsigned char *data, unsigned int datalen)
 {
-        int i, j;
+        int i, n;
-        int len = datalen;
+        char buf[5*8];
-        printk(KERN_DEBUG "data ");
+        n = 0;
-        for (i = 0; i < len; i += 4) {
+        i = 0;
-                for (j = 0; (j < 4) && (i + j < len); j++)
+        while (i < datalen) {
-                        printk(KERN_DEBUG "%02x", data[i + j]);
+                n += snprintf(buf+n, sizeof(buf)-n, "%02x", data[i++]);
-                printk(KERN_DEBUG " ");
+                if (i%4 == 0)
+                        n += snprintf(buf+n, sizeof(buf)-n, " ");
+                if (i%16 == 0) {
+                        dprintk(DEBUG_ERROR, "%s\n", buf);
+                        n = 0;
+                }
        }
-        printk(KERN_DEBUG "\n");
+        dprintk(DEBUG_ERROR, "%s\n", buf);
 }
 #else                           /* DEBUG_DUMP_PKT */
 static inline void dump_data(const unsigned char *data, unsigned int datalen)
diff --git a/fs/9p/error.c b/fs/9p/error.c
index 834cb179e388..e4b6f8f38b6f 100644
--- a/fs/9p/error.c
+++ b/fs/9p/error.c
@@ -33,7 +33,6 @@
 #include <linux/list.h>
 #include <linux/jhash.h>
-#include <linux/string.h>
 #include "debug.h"
 #include "error.h"
@@ -55,7 +54,8 @@ int v9fs_error_init(void)
        /* load initial error map into hash table */
        for (c = errmap; c->name != NULL; c++) {
-                bucket = jhash(c->name, strlen(c->name), 0) % ERRHASHSZ;
+                c->namelen = strlen(c->name);
+                bucket = jhash(c->name, c->namelen, 0) % ERRHASHSZ;
                INIT_HLIST_NODE(&c->list);
                hlist_add_head(&c->list, &hash_errmap[bucket]);
        }
@@ -69,15 +69,15 @@ int v9fs_error_init(void)
 *
 */
-int v9fs_errstr2errno(char *errstr)
+int v9fs_errstr2errno(char *errstr, int len)
 {
        int errno = 0;
        struct hlist_node *p = NULL;
        struct errormap *c = NULL;
-        int bucket = jhash(errstr, strlen(errstr), 0) % ERRHASHSZ;
+        int bucket = jhash(errstr, len, 0) % ERRHASHSZ;
        hlist_for_each_entry(c, p, &hash_errmap[bucket], list) {
-                if (!strcmp(c->name, errstr)) {
+                if (c->namelen==len && !memcmp(c->name, errstr, len)) {
                        errno = c->val;
                        break;
                }
diff --git a/fs/9p/error.h b/fs/9p/error.h
index 78f89acf7c9a..a9794e85fe51 100644
--- a/fs/9p/error.h
+++ b/fs/9p/error.h
@@ -36,6 +36,7 @@ struct errormap {
        char *name;
        int val;
+        int namelen;
        struct hlist_node list;
 };
@@ -175,4 +176,3 @@ static struct errormap errmap[] = {
 };
 extern int v9fs_error_init(void);
-extern int v9fs_errstr2errno(char *errstr);
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index d95f8626d170..eda449778fa5 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -31,9 +31,6 @@
 #include "v9fs.h"
 #include "9p.h"
 #include "v9fs_vfs.h"
-#include "transport.h"
-#include "mux.h"
-#include "conv.h"
 #include "fid.h"
 /**
@@ -164,7 +161,7 @@ static struct v9fs_fid *v9fs_fid_walk_up(struct dentry *dentry)
        return v9fs_fid_create(dentry, v9ses, fidnum, 0);
 clunk_fid:
-        v9fs_t_clunk(v9ses, fidnum, NULL);
+        v9fs_t_clunk(v9ses, fidnum);
        return ERR_PTR(err);
 }
diff --git a/fs/9p/mux.c b/fs/9p/mux.c
index 8835b576f744..945cb368d451 100644
--- a/fs/9p/mux.c
+++ b/fs/9p/mux.c
@@ -4,7 +4,7 @@
 * Protocol Multiplexer
 *
 *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
- *  Copyright (C) 2004 by Latchesar Ionkov <lucho@ionkov.net>
+ *  Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
@@ -28,448 +28,943 @@
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
+#include <linux/poll.h>
 #include <linux/kthread.h>
 #include <linux/idr.h>
 #include "debug.h"
 #include "v9fs.h"
 #include "9p.h"
-#include "transport.h"
 #include "conv.h"
+#include "transport.h"
 #include "mux.h"
+#define ERREQFLUSH      1
+#define SCHED_TIMEOUT   10
+#define MAXPOLLWADDR    2
+enum {
+        Rworksched = 1,         /* read work scheduled or running */
+        Rpending = 2,           /* can read */
+        Wworksched = 4,         /* write work scheduled or running */
+        Wpending = 8,           /* can write */
+};
+struct v9fs_mux_poll_task;
+struct v9fs_req {
+        int tag;
+        struct v9fs_fcall *tcall;
+        struct v9fs_fcall *rcall;
+        int err;
+        v9fs_mux_req_callback cb;
+        void *cba;
+        struct list_head req_list;
+};
+struct v9fs_mux_data {
+        spinlock_t lock;
+        struct list_head mux_list;
+        struct v9fs_mux_poll_task *poll_task;
+        int msize;
+        unsigned char *extended;
+        struct v9fs_transport *trans;
+        struct v9fs_idpool tidpool;
+        int err;
+        wait_queue_head_t equeue;
+        struct list_head req_list;
+        struct list_head unsent_req_list;
+        struct v9fs_fcall *rcall;
+        int rpos;
+        char *rbuf;
+        int wpos;
+        int wsize;
+        char *wbuf;
+        wait_queue_t poll_wait[MAXPOLLWADDR];
+        wait_queue_head_t *poll_waddr[MAXPOLLWADDR];
+        poll_table pt;
+        struct work_struct rq;
+        struct work_struct wq;
+        unsigned long wsched;
+};
+struct v9fs_mux_poll_task {
+        struct task_struct *task;
+        struct list_head mux_list;
+        int muxnum;
+};
+struct v9fs_mux_rpc {
+        struct v9fs_mux_data *m;
+        struct v9fs_req *req;
+        int err;
+        struct v9fs_fcall *rcall;
+        wait_queue_head_t wqueue;
+};
+static int v9fs_poll_proc(void *);
+static void v9fs_read_work(void *);
+static void v9fs_write_work(void *);
+static void v9fs_pollwait(struct file *filp, wait_queue_head_t * wait_address,
+                          poll_table * p);
+static u16 v9fs_mux_get_tag(struct v9fs_mux_data *);
+static void v9fs_mux_put_tag(struct v9fs_mux_data *, u16);
+static DECLARE_MUTEX(v9fs_mux_task_lock);
+static struct workqueue_struct *v9fs_mux_wq;
+static int v9fs_mux_num;
+static int v9fs_mux_poll_task_num;
+static struct v9fs_mux_poll_task v9fs_mux_poll_tasks[100];
+int v9fs_mux_global_init(void)
+{
+        int i;
+        for (i = 0; i < ARRAY_SIZE(v9fs_mux_poll_tasks); i++)
+                v9fs_mux_poll_tasks[i].task = NULL;
+        v9fs_mux_wq = create_workqueue("v9fs");
+        if (!v9fs_mux_wq)
+                return -ENOMEM;
+        return 0;
+}
+void v9fs_mux_global_exit(void)
+{
+        destroy_workqueue(v9fs_mux_wq);
+}
 /**
- * dprintcond - print condition of session info
+ * v9fs_mux_calc_poll_procs - calculates the number of polling procs
- * @v9ses: session info structure
+ * based on the number of mounted v9fs filesystems.
- * @req: RPC request structure
 *
+ * The current implementation returns sqrt of the number of mounts.
 */
+inline int v9fs_mux_calc_poll_procs(int muxnum)
+{
+        int n;
+        if (v9fs_mux_poll_task_num)
+                n = muxnum / v9fs_mux_poll_task_num +
+                    (muxnum % v9fs_mux_poll_task_num ? 1 : 0);
+        else
+                n = 1;
+        if (n > ARRAY_SIZE(v9fs_mux_poll_tasks))
+                n = ARRAY_SIZE(v9fs_mux_poll_tasks);
+        return n;
+}
-static inline int
+static int v9fs_mux_poll_start(struct v9fs_mux_data *m)
-dprintcond(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req)
 {
-        dprintk(DEBUG_MUX, "condition: %d, %p\n", v9ses->transport->status,
+        int i, n;
-                req->rcall);
+        struct v9fs_mux_poll_task *vpt, *vptlast;
+        struct task_struct *pproc;
+        dprintk(DEBUG_MUX, "mux %p muxnum %d procnum %d\n", m, v9fs_mux_num,
+                v9fs_mux_poll_task_num);
+        up(&v9fs_mux_task_lock);
+        n = v9fs_mux_calc_poll_procs(v9fs_mux_num + 1);
+        if (n > v9fs_mux_poll_task_num) {
+                for (i = 0; i < ARRAY_SIZE(v9fs_mux_poll_tasks); i++) {
+                        if (v9fs_mux_poll_tasks[i].task == NULL) {
+                                vpt = &v9fs_mux_poll_tasks[i];
+                                dprintk(DEBUG_MUX, "create proc %p\n", vpt);
+                                pproc = kthread_create(v9fs_poll_proc, vpt,
+                                                   "v9fs-poll");
+                                if (!IS_ERR(pproc)) {
+                                        vpt->task = pproc;
+                                        INIT_LIST_HEAD(&vpt->mux_list);
+                                        vpt->muxnum = 0;
+                                        v9fs_mux_poll_task_num++;
+                                        wake_up_process(vpt->task);
+                                }
+                                break;
+                        }
+                }
+                if (i >= ARRAY_SIZE(v9fs_mux_poll_tasks))
+                        dprintk(DEBUG_ERROR, "warning: no free poll slots\n");
+        }
+        n = (v9fs_mux_num + 1) / v9fs_mux_poll_task_num +
+            ((v9fs_mux_num + 1) % v9fs_mux_poll_task_num ? 1 : 0);
+        vptlast = NULL;
+        for (i = 0; i < ARRAY_SIZE(v9fs_mux_poll_tasks); i++) {
+                vpt = &v9fs_mux_poll_tasks[i];
+                if (vpt->task != NULL) {
+                        vptlast = vpt;
+                        if (vpt->muxnum < n) {
+                                dprintk(DEBUG_MUX, "put in proc %d\n", i);
+                                list_add(&m->mux_list, &vpt->mux_list);
+                                vpt->muxnum++;
+                                m->poll_task = vpt;
+                                memset(&m->poll_waddr, 0, sizeof(m->poll_waddr));
+                                init_poll_funcptr(&m->pt, v9fs_pollwait);
+                                break;
+                        }
+                }
+        }
+        if (i >= ARRAY_SIZE(v9fs_mux_poll_tasks)) {
+                if (vptlast == NULL)
+                        return -ENOMEM;
+                dprintk(DEBUG_MUX, "put in proc %d\n", i);
+                list_add(&m->mux_list, &vptlast->mux_list);
+                vptlast->muxnum++;
+                m->poll_task = vptlast;
+                memset(&m->poll_waddr, 0, sizeof(m->poll_waddr));
+                init_poll_funcptr(&m->pt, v9fs_pollwait);
+        }
+        v9fs_mux_num++;
+        down(&v9fs_mux_task_lock);
        return 0;
 }
+static void v9fs_mux_poll_stop(struct v9fs_mux_data *m)
+{
+        int i;
+        struct v9fs_mux_poll_task *vpt;
+        up(&v9fs_mux_task_lock);
+        vpt = m->poll_task;
+        list_del(&m->mux_list);
+        for(i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) {
+                if (m->poll_waddr[i] != NULL) {
+                        remove_wait_queue(m->poll_waddr[i], &m->poll_wait[i]);
+                        m->poll_waddr[i] = NULL;
+                }
+        }
+        vpt->muxnum--;
+        if (!vpt->muxnum) {
+                dprintk(DEBUG_MUX, "destroy proc %p\n", vpt);
+                send_sig(SIGKILL, vpt->task, 1);
+                vpt->task = NULL;
+                v9fs_mux_poll_task_num--;
+        }
+        v9fs_mux_num--;
+        down(&v9fs_mux_task_lock);
+}
 /**
- * xread - force read of a certain number of bytes
+ * v9fs_mux_init - allocate and initialize the per-session mux data
- * @v9ses: session info structure
+ * Creates the polling task if this is the first session.
- * @ptr: pointer to buffer
- * @sz: number of bytes to read
 *
- * Chuck Cranor CS-533 project1
+ * @trans - transport structure
+ * @msize - maximum message size
+ * @extended - pointer to the extended flag
 */
+struct v9fs_mux_data *v9fs_mux_init(struct v9fs_transport *trans, int msize,
-static int xread(struct v9fs_session_info *v9ses, void *ptr, unsigned long sz)
+                                    unsigned char *extended)
 {
-        int rd = 0;
+        int i, n;
-        int ret = 0;
+        struct v9fs_mux_data *m, *mtmp;
-        while (rd < sz) {
-                ret = v9ses->transport->read(v9ses->transport, ptr, sz - rd);
+        dprintk(DEBUG_MUX, "transport %p msize %d\n", trans, msize);
-                if (ret <= 0) {
+        m = kmalloc(sizeof(struct v9fs_mux_data), GFP_KERNEL);
-                        dprintk(DEBUG_ERROR, "xread errno %d\n", ret);
+        if (!m)
-                        return ret;
+                return ERR_PTR(-ENOMEM);
+        spin_lock_init(&m->lock);
+        INIT_LIST_HEAD(&m->mux_list);
+        m->msize = msize;
+        m->extended = extended;
+        m->trans = trans;
+        idr_init(&m->tidpool.pool);
+        init_MUTEX(&m->tidpool.lock);
+        m->err = 0;
+        init_waitqueue_head(&m->equeue);
+        INIT_LIST_HEAD(&m->req_list);
+        INIT_LIST_HEAD(&m->unsent_req_list);
+        m->rcall = NULL;
+        m->rpos = 0;
+        m->rbuf = NULL;
+        m->wpos = m->wsize = 0;
+        m->wbuf = NULL;
+        INIT_WORK(&m->rq, v9fs_read_work, m);
+        INIT_WORK(&m->wq, v9fs_write_work, m);
+        m->wsched = 0;
+        memset(&m->poll_waddr, 0, sizeof(m->poll_waddr));
+        m->poll_task = NULL;
+        n = v9fs_mux_poll_start(m);
+        if (n)
+                return ERR_PTR(n);
+        n = trans->poll(trans, &m->pt);
+        if (n & POLLIN) {
+                dprintk(DEBUG_MUX, "mux %p can read\n", m);
+                set_bit(Rpending, &m->wsched);
+        }
+        if (n & POLLOUT) {
+                dprintk(DEBUG_MUX, "mux %p can write\n", m);
+                set_bit(Wpending, &m->wsched);
+        }
+        for(i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) {
+                if (IS_ERR(m->poll_waddr[i])) {
+                        v9fs_mux_poll_stop(m);
+                        mtmp = (void *)m->poll_waddr;   /* the error code */
+                        kfree(m);
+                        m = mtmp;
+                        break;
                }
-                rd += ret;
-                ptr += ret;
        }
-        return (rd);
+        return m;
 }
 /**
- * read_message - read a full 9P2000 fcall packet
+ * v9fs_mux_destroy - cancels all pending requests and frees mux resources
- * @v9ses: session info structure
- * @rcall: fcall structure to read into
- * @rcalllen: size of fcall buffer
- *
 */
+void v9fs_mux_destroy(struct v9fs_mux_data *m)
+{
+        dprintk(DEBUG_MUX, "mux %p prev %p next %p\n", m,
+                m->mux_list.prev, m->mux_list.next);
+        v9fs_mux_cancel(m, -ECONNRESET);
+        if (!list_empty(&m->req_list)) {
+                /* wait until all processes waiting on this session exit */
+                dprintk(DEBUG_MUX, "mux %p waiting for empty request queue\n",
+                        m);
+                wait_event_timeout(m->equeue, (list_empty(&m->req_list)), 5000);
+                dprintk(DEBUG_MUX, "mux %p request queue empty: %d\n", m,
+                        list_empty(&m->req_list));
+        }
+        v9fs_mux_poll_stop(m);
+        m->trans = NULL;
+        kfree(m);
+}
-static int
+/**
-read_message(struct v9fs_session_info *v9ses,
+ * v9fs_pollwait - called by files poll operation to add v9fs-poll task
-             struct v9fs_fcall *rcall, int rcalllen)
+ *      to files wait queue
+ */
+static void
+v9fs_pollwait(struct file *filp, wait_queue_head_t * wait_address,
+              poll_table * p)
 {
-        unsigned char buf[4];
+        int i;
-        void *data;
+        struct v9fs_mux_data *m;
-        int size = 0;
-        int res = 0;
+        m = container_of(p, struct v9fs_mux_data, pt);
+        for(i = 0; i < ARRAY_SIZE(m->poll_waddr); i++)
-        res = xread(v9ses, buf, sizeof(buf));
+                if (m->poll_waddr[i] == NULL)
-        if (res < 0) {
+                        break;
-                dprintk(DEBUG_ERROR,
-                        "Reading of count field failed returned: %d\n", res);
+        if (i >= ARRAY_SIZE(m->poll_waddr)) {
-                return res;
+                dprintk(DEBUG_ERROR, "not enough wait_address slots\n");
+                return;
        }
-        if (res < 4) {
+        m->poll_waddr[i] = wait_address;
-                dprintk(DEBUG_ERROR,
-                        "Reading of count field failed returned: %d\n", res);
+        if (!wait_address) {
-                return -EIO;
+                dprintk(DEBUG_ERROR, "no wait_address\n");
+                m->poll_waddr[i] = ERR_PTR(-EIO);
+                return;
        }
-        size = buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24);
+        init_waitqueue_entry(&m->poll_wait[i], m->poll_task->task);
-        dprintk(DEBUG_MUX, "got a packet count: %d\n", size);
+        add_wait_queue(wait_address, &m->poll_wait[i]);
+}
+/**
+ * v9fs_poll_mux - polls a mux and schedules read or write works if necessary
+ */
+static inline void v9fs_poll_mux(struct v9fs_mux_data *m)
+{
+        int n;
-        /* adjust for the four bytes of size */
+        if (m->err < 0)
-        size -= 4;
+                return;
-        if (size > v9ses->maxdata) {
+        n = m->trans->poll(m->trans, NULL);
-                dprintk(DEBUG_ERROR, "packet too big: %d\n", size);
+        if (n < 0 || n & (POLLERR | POLLHUP | POLLNVAL)) {
-                return -E2BIG;
+                dprintk(DEBUG_MUX, "error mux %p err %d\n", m, n);
+                if (n >= 0)
+                        n = -ECONNRESET;
+                v9fs_mux_cancel(m, n);
        }
-        data = kmalloc(size, GFP_KERNEL);
+        if (n & POLLIN) {
-        if (!data) {
+                set_bit(Rpending, &m->wsched);
-                eprintk(KERN_WARNING, "out of memory\n");
+                dprintk(DEBUG_MUX, "mux %p can read\n", m);
-                return -ENOMEM;
+                if (!test_and_set_bit(Rworksched, &m->wsched)) {
+                        dprintk(DEBUG_MUX, "schedule read work mux %p\n", m);
+                        queue_work(v9fs_mux_wq, &m->rq);
+                }
        }
-        res = xread(v9ses, data, size);
+        if (n & POLLOUT) {
-        if (res < size) {
+                set_bit(Wpending, &m->wsched);
-                dprintk(DEBUG_ERROR, "Reading of fcall failed returned: %d\n",
+                dprintk(DEBUG_MUX, "mux %p can write\n", m);
-                        res);
+                if ((m->wsize || !list_empty(&m->unsent_req_list))
-                kfree(data);
+                    && !test_and_set_bit(Wworksched, &m->wsched)) {
-                return res;
+                        dprintk(DEBUG_MUX, "schedule write work mux %p\n", m);
+                        queue_work(v9fs_mux_wq, &m->wq);
+                }
        }
+}
+/**
+ * v9fs_poll_proc - polls all v9fs transports for new events and queues
+ *      the appropriate work to the work queue
+ */
+static int v9fs_poll_proc(void *a)
+{
+        struct v9fs_mux_data *m, *mtmp;
+        struct v9fs_mux_poll_task *vpt;
-        /* we now have an in-memory string that is the reply.
+        vpt = a;
-         * deserialize it. There is very little to go wrong at this point
+        dprintk(DEBUG_MUX, "start %p %p\n", current, vpt);
-         * save for v9fs_alloc errors.
+        allow_signal(SIGKILL);
-         */
+        while (!kthread_should_stop()) {
-        res = v9fs_deserialize_fcall(v9ses, size, data, v9ses->maxdata,
+                set_current_state(TASK_INTERRUPTIBLE);
-                                     rcall, rcalllen);
+                if (signal_pending(current))
+                        break;
-        kfree(data);
+                list_for_each_entry_safe(m, mtmp, &vpt->mux_list, mux_list) {
+                        v9fs_poll_mux(m);
+                }
-        if (res < 0)
+                dprintk(DEBUG_MUX, "sleeping...\n");
-                return res;
+                schedule_timeout(SCHED_TIMEOUT * HZ);
+        }
+        __set_current_state(TASK_RUNNING);
+        dprintk(DEBUG_MUX, "finish\n");
        return 0;
 }
 /**
- * v9fs_recv - receive an RPC response for a particular tag
+ * v9fs_write_work - called when a transport can send some data
- * @v9ses: session info structure
- * @req: RPC request structure
- *
 */
+static void v9fs_write_work(void *a)
-static int v9fs_recv(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req)
 {
-        int ret = 0;
+        int n, err;
+        struct v9fs_mux_data *m;
+        struct v9fs_req *req;
-        dprintk(DEBUG_MUX, "waiting for response: %d\n", req->tcall->tag);
+        m = a;
-        ret = wait_event_interruptible(v9ses->read_wait,
-                       ((v9ses->transport->status != Connected) ||
-                        (req->rcall != 0) || (req->err < 0) ||
-                        dprintcond(v9ses, req)));
-        dprintk(DEBUG_MUX, "got it: rcall %p\n", req->rcall);
+        if (m->err < 0) {
+                clear_bit(Wworksched, &m->wsched);
+                return;
+        }
-        spin_lock(&v9ses->muxlock);
+        if (!m->wsize) {
-        list_del(&req->next);
+                if (list_empty(&m->unsent_req_list)) {
-        spin_unlock(&v9ses->muxlock);
+                        clear_bit(Wworksched, &m->wsched);
+                        return;
+                }
-        if (req->err < 0)
+                spin_lock(&m->lock);
-                return req->err;
+                req =
+                    list_entry(m->unsent_req_list.next, struct v9fs_req,
+                               req_list);
+                list_move_tail(&req->req_list, &m->req_list);
+                m->wbuf = req->tcall->sdata;
+                m->wsize = req->tcall->size;
+                m->wpos = 0;
+                dump_data(m->wbuf, m->wsize);
+                spin_unlock(&m->lock);
+        }
-        if (v9ses->transport->status == Disconnected)
+        dprintk(DEBUG_MUX, "mux %p pos %d size %d\n", m, m->wpos, m->wsize);
-                return -ECONNRESET;
+        clear_bit(Wpending, &m->wsched);
+        err = m->trans->write(m->trans, m->wbuf + m->wpos, m->wsize - m->wpos);
+        dprintk(DEBUG_MUX, "mux %p sent %d bytes\n", m, err);
+        if (err == -EAGAIN) {
+                clear_bit(Wworksched, &m->wsched);
+                return;
+        }
-        return ret;
+        if (err <= 0)
-}
+                goto error;
-/**
+        m->wpos += err;
- * v9fs_send - send a 9P request
+        if (m->wpos == m->wsize)
- * @v9ses: session info structure
+                m->wpos = m->wsize = 0;
- * @req: RPC request to send
- *
+        if (m->wsize == 0 && !list_empty(&m->unsent_req_list)) {
- */
+                if (test_and_clear_bit(Wpending, &m->wsched))
+                        n = POLLOUT;
+                else
+                        n = m->trans->poll(m->trans, NULL);
+                if (n & POLLOUT) {
+                        dprintk(DEBUG_MUX, "schedule write work mux %p\n", m);
+                        queue_work(v9fs_mux_wq, &m->wq);
+                } else
+                        clear_bit(Wworksched, &m->wsched);
+        } else
+                clear_bit(Wworksched, &m->wsched);
+        return;
-static int v9fs_send(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req)
+      error:
+        v9fs_mux_cancel(m, err);
+        clear_bit(Wworksched, &m->wsched);
+}
+static void process_request(struct v9fs_mux_data *m, struct v9fs_req *req)
 {
-        int ret = -1;
+        int ecode, tag;
-        void *data = NULL;
+        struct v9fs_str *ename;
-        struct v9fs_fcall *tcall = req->tcall;
-        data = kmalloc(v9ses->maxdata + V9FS_IOHDRSZ, GFP_KERNEL);
+        tag = req->tag;
-        if (!data)
+        if (req->rcall->id == RERROR && !req->err) {
-                return -ENOMEM;
+                ecode = req->rcall->params.rerror.errno;
+                ename = &req->rcall->params.rerror.error;
-        tcall->size = 0;        /* enforce size recalculation */
+                dprintk(DEBUG_MUX, "Rerror %.*s\n", ename->len, ename->str);
-        ret =
-            v9fs_serialize_fcall(v9ses, tcall, data,
-                                 v9ses->maxdata + V9FS_IOHDRSZ);
-        if (ret < 0)
-                goto free_data;
-        spin_lock(&v9ses->muxlock);
+                if (*m->extended)
-        list_add(&req->next, &v9ses->mux_fcalls);
+                        req->err = -ecode;
-        spin_unlock(&v9ses->muxlock);
-        dprintk(DEBUG_MUX, "sending message: tag %d size %d\n", tcall->tag,
+                if (!req->err) {
-                tcall->size);
+                        req->err = v9fs_errstr2errno(ename->str, ename->len);
-        ret = v9ses->transport->write(v9ses->transport, data, tcall->size);
-        if (ret != tcall->size) {
+                        if (!req->err) {        /* string match failed */
-                spin_lock(&v9ses->muxlock);
+                                PRINT_FCALL_ERROR("unknown error", req->rcall);
-                list_del(&req->next);
+                        }
-                kfree(req->rcall);
+                        if (!req->err)
+                                req->err = -ESERVERFAULT;
+                }
+        } else if (req->tcall && req->rcall->id != req->tcall->id + 1) {
+                dprintk(DEBUG_ERROR, "fcall mismatch: expected %d, got %d\n",
+                        req->tcall->id + 1, req->rcall->id);
+                if (!req->err)
+                        req->err = -EIO;
+        }
-                spin_unlock(&v9ses->muxlock);
+        if (req->cb && req->err != ERREQFLUSH) {
-                if (ret >= 0)
+                dprintk(DEBUG_MUX, "calling callback tcall %p rcall %p\n",
-                        ret = -EREMOTEIO;
+                        req->tcall, req->rcall);
+                (*req->cb) (req->cba, req->tcall, req->rcall, req->err);
+                req->cb = NULL;
        } else
-                ret = 0;
+                kfree(req->rcall);
-      free_data:
+        v9fs_mux_put_tag(m, tag);
-        kfree(data);
-        return ret;
+        wake_up(&m->equeue);
+        kfree(req);
 }
 /**
- * v9fs_mux_rpc - send a request, receive a response
+ * v9fs_read_work - called when there is some data to be read from a transport
- * @v9ses: session info structure
- * @tcall: fcall to send
- * @rcall: buffer to place response into
- *
 */
+static void v9fs_read_work(void *a)
-long
-v9fs_mux_rpc(struct v9fs_session_info *v9ses, struct v9fs_fcall *tcall,
-             struct v9fs_fcall **rcall)
 {
-        int tid = -1;
+        int n, err;
-        struct v9fs_fcall *fcall = NULL;
+        struct v9fs_mux_data *m;
-        struct v9fs_rpcreq req;
+        struct v9fs_req *req, *rptr, *rreq;
-        int ret = -1;
+        struct v9fs_fcall *rcall;
+        char *rbuf;
-        if (!v9ses)
-                return -EINVAL;
+        m = a;
-        if (!v9ses->transport || v9ses->transport->status != Connected)
+        if (m->err < 0)
-                return -EIO;
+                return;
+        rcall = NULL;
+        dprintk(DEBUG_MUX, "start mux %p pos %d\n", m, m->rpos);
+        if (!m->rcall) {
+                m->rcall =
+                    kmalloc(sizeof(struct v9fs_fcall) + m->msize, GFP_KERNEL);
+                if (!m->rcall) {
+                        err = -ENOMEM;
+                        goto error;
+                }
-        if (rcall)
+                m->rbuf = (char *)m->rcall + sizeof(struct v9fs_fcall);
-                *rcall = NULL;
+                m->rpos = 0;
+        }
-        if (tcall->id != TVERSION) {
+        clear_bit(Rpending, &m->wsched);
-                tid = v9fs_get_idpool(&v9ses->tidpool);
+        err = m->trans->read(m->trans, m->rbuf + m->rpos, m->msize - m->rpos);
-                if (tid < 0)
+        dprintk(DEBUG_MUX, "mux %p got %d bytes\n", m, err);
-                        return -ENOMEM;
+        if (err == -EAGAIN) {
+                clear_bit(Rworksched, &m->wsched);
+                return;
        }
-        tcall->tag = tid;
+        if (err <= 0)
+                goto error;
-        req.tcall = tcall;
+        m->rpos += err;
-        req.err = 0;
+        while (m->rpos > 4) {
-        req.rcall = NULL;
+                n = le32_to_cpu(*(__le32 *) m->rbuf);
+                if (n >= m->msize) {
+                        dprintk(DEBUG_ERROR,
+                                "requested packet size too big: %d\n", n);
+                        err = -EIO;
+                        goto error;
+                }
-        ret = v9fs_send(v9ses, &req);
+                if (m->rpos < n)
+                        break;
-        if (ret < 0) {
+                dump_data(m->rbuf, n);
-                if (tcall->id != TVERSION)
+                err =
-                        v9fs_put_idpool(tid, &v9ses->tidpool);
+                    v9fs_deserialize_fcall(m->rbuf, n, m->rcall, *m->extended);
-                dprintk(DEBUG_MUX, "error %d\n", ret);
+                if (err < 0) {
-                return ret;
+                        goto error;
-        }
+                }
+                rcall = m->rcall;
+                rbuf = m->rbuf;
+                if (m->rpos > n) {
+                        m->rcall = kmalloc(sizeof(struct v9fs_fcall) + m->msize,
+                                           GFP_KERNEL);
+                        if (!m->rcall) {
+                                err = -ENOMEM;
+                                goto error;
+                        }
-        ret = v9fs_recv(v9ses, &req);
+                        m->rbuf = (char *)m->rcall + sizeof(struct v9fs_fcall);
+                        memmove(m->rbuf, rbuf + n, m->rpos - n);
-        fcall = req.rcall;
+                        m->rpos -= n;
+                } else {
-        dprintk(DEBUG_MUX, "received: tag=%x, ret=%d\n", tcall->tag, ret);
+                        m->rcall = NULL;
-        if (ret == -ERESTARTSYS) {
+                        m->rbuf = NULL;
-                if (v9ses->transport->status != Disconnected
+                        m->rpos = 0;
-                    && tcall->id != TFLUSH) {
-                        unsigned long flags;
-                        dprintk(DEBUG_MUX, "flushing the tag: %d\n",
-                                tcall->tag);
-                        clear_thread_flag(TIF_SIGPENDING);
-                        v9fs_t_flush(v9ses, tcall->tag);
-                        spin_lock_irqsave(&current->sighand->siglock, flags);
-                        recalc_sigpending();
-                        spin_unlock_irqrestore(&current->sighand->siglock,
-                                               flags);
-                        dprintk(DEBUG_MUX, "flushing done\n");
                }
-                goto release_req;
+                dprintk(DEBUG_MUX, "mux %p fcall id %d tag %d\n", m, rcall->id,
-        } else if (ret < 0)
+                        rcall->tag);
-                goto release_req;
+                req = NULL;
-        if (!fcall)
+                spin_lock(&m->lock);
-                ret = -EIO;
+                list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) {
-        else {
+                        if (rreq->tag == rcall->tag) {
-                if (fcall->id == RERROR) {
+                                req = rreq;
-                        ret = v9fs_errstr2errno(fcall->params.rerror.error);
+                                req->rcall = rcall;
-                        if (ret == 0) { /* string match failed */
+                                list_del(&req->req_list);
-                                if (fcall->params.rerror.errno)
+                                spin_unlock(&m->lock);
-                                        ret = -(fcall->params.rerror.errno);
+                                process_request(m, req);
-                                else
+                                break;
-                                        ret = -ESERVERFAULT;
                        }
-                } else if (fcall->id != tcall->id + 1) {
-                        dprintk(DEBUG_ERROR,
+                }
-                                "fcall mismatch: expected %d, got %d\n",
-                                tcall->id + 1, fcall->id);
+                if (!req) {
-                        ret = -EIO;
+                        spin_unlock(&m->lock);
+                        if (err >= 0 && rcall->id != RFLUSH)
+                                dprintk(DEBUG_ERROR,
+                                        "unexpected response mux %p id %d tag %d\n",
+                                        m, rcall->id, rcall->tag);
+                        kfree(rcall);
                }
        }
-      release_req:
+        if (!list_empty(&m->req_list)) {
-        if (tcall->id != TVERSION)
+                if (test_and_clear_bit(Rpending, &m->wsched))
-                v9fs_put_idpool(tid, &v9ses->tidpool);
+                        n = POLLIN;
-        if (rcall)
+                else
-                *rcall = fcall;
+                        n = m->trans->poll(m->trans, NULL);
-        else
-                kfree(fcall);
+                if (n & POLLIN) {
+                        dprintk(DEBUG_MUX, "schedule read work mux %p\n", m);
+                        queue_work(v9fs_mux_wq, &m->rq);
+                } else
+                        clear_bit(Rworksched, &m->wsched);
+        } else
+                clear_bit(Rworksched, &m->wsched);
+        return;
-        return ret;
+      error:
+        v9fs_mux_cancel(m, err);
+        clear_bit(Rworksched, &m->wsched);
 }
 /**
- * v9fs_mux_cancel_requests - cancels all pending requests
+ * v9fs_send_request - send 9P request
+ * The function can sleep until the request is scheduled for sending.
+ * The function can be interrupted. Return from the function is not
+ * a guarantee that the request is sent succesfully. Can return errors
+ * that can be retrieved by PTR_ERR macros.
 *
- * @v9ses: session info structure
+ * @m: mux data
- * @err: error code to return to the requests
+ * @tc: request to be sent
+ * @cb: callback function to call when response is received
+ * @cba: parameter to pass to the callback function
 */
-void v9fs_mux_cancel_requests(struct v9fs_session_info *v9ses, int err)
+static struct v9fs_req *v9fs_send_request(struct v9fs_mux_data *m,
+                                          struct v9fs_fcall *tc,
+                                          v9fs_mux_req_callback cb, void *cba)
 {
-        struct v9fs_rpcreq *rptr;
+        int n;
-        struct v9fs_rpcreq *rreq;
+        struct v9fs_req *req;
-        dprintk(DEBUG_MUX, " %d\n", err);
+        dprintk(DEBUG_MUX, "mux %p task %p tcall %p id %d\n", m, current,
-        spin_lock(&v9ses->muxlock);
+                tc, tc->id);
-        list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) {
+        if (m->err < 0)
-                rreq->err = err;
+                return ERR_PTR(m->err);
-        }
-        spin_unlock(&v9ses->muxlock);
-        wake_up_all(&v9ses->read_wait);
-}
-/**
+        req = kmalloc(sizeof(struct v9fs_req), GFP_KERNEL);
- * v9fs_recvproc - kproc to handle demultiplexing responses
+        if (!req)
- * @data: session info structure
+                return ERR_PTR(-ENOMEM);
- *
- */
-static int v9fs_recvproc(void *data)
+        if (tc->id == TVERSION)
-{
+                n = V9FS_NOTAG;
-        struct v9fs_session_info *v9ses = (struct v9fs_session_info *)data;
+        else
-        struct v9fs_fcall *rcall = NULL;
+                n = v9fs_mux_get_tag(m);
-        struct v9fs_rpcreq *rptr;
-        struct v9fs_rpcreq *req;
-        struct v9fs_rpcreq *rreq;
-        int err = 0;
-        allow_signal(SIGKILL);
+        if (n < 0)
-        set_current_state(TASK_INTERRUPTIBLE);
+                return ERR_PTR(-ENOMEM);
-        complete(&v9ses->proccmpl);
-        while (!kthread_should_stop() && err >= 0) {
-                req = rptr = rreq = NULL;
-                rcall = kmalloc(v9ses->maxdata + V9FS_IOHDRSZ, GFP_KERNEL);
-                if (!rcall) {
-                        eprintk(KERN_ERR, "no memory for buffers\n");
-                        break;
-                }
-                err = read_message(v9ses, rcall, v9ses->maxdata + V9FS_IOHDRSZ);
+        v9fs_set_tag(tc, n);
-                spin_lock(&v9ses->muxlock);
-                if (err < 0) {
-                        list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) {
-                                rreq->err = err;
-                        }
-                        if(err != -ERESTARTSYS)
-                                eprintk(KERN_ERR,
-                                        "Transport error while reading message %d\n", err);
-                } else {
-                        list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) {
-                                if (rreq->tcall->tag == rcall->tag) {
-                                        req = rreq;
-                                        req->rcall = rcall;
-                                        break;
-                                }
-                        }
-                }
-                if (req && (req->tcall->id == TFLUSH)) {
+        req->tag = n;
-                        struct v9fs_rpcreq *treq = NULL;
+        req->tcall = tc;
-                        list_for_each_entry_safe(treq, rptr, &v9ses->mux_fcalls, next) {
+        req->rcall = NULL;
-                                if (treq->tcall->tag ==
+        req->err = 0;
-                                    req->tcall->params.tflush.oldtag) {
+        req->cb = cb;
-                                        list_del(&rptr->next);
+        req->cba = cba;
-                                        kfree(treq->rcall);
-                                        break;
+        spin_lock(&m->lock);
-                                }
+        list_add_tail(&req->req_list, &m->unsent_req_list);
+        spin_unlock(&m->lock);
+        if (test_and_clear_bit(Wpending, &m->wsched))
+                n = POLLOUT;
+        else
+                n = m->trans->poll(m->trans, NULL);
+        if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched))
+                queue_work(v9fs_mux_wq, &m->wq);
+        return req;
+}
+static inline void
+v9fs_mux_flush_cb(void *a, struct v9fs_fcall *tc, struct v9fs_fcall *rc,
+                  int err)
+{
+        v9fs_mux_req_callback cb;
+        int tag;
+        struct v9fs_mux_data *m;
+        struct v9fs_req *req, *rptr;
+        m = a;
+        dprintk(DEBUG_MUX, "mux %p tc %p rc %p err %d oldtag %d\n", m, tc,
+                rc, err, tc->params.tflush.oldtag);
+        spin_lock(&m->lock);
+        cb = NULL;
+        tag = tc->params.tflush.oldtag;
+        list_for_each_entry_safe(req, rptr, &m->req_list, req_list) {
+                if (req->tag == tag) {
+                        list_del(&req->req_list);
+                        if (req->cb) {
+                                cb = req->cb;
+                                req->cb = NULL;
+                                spin_unlock(&m->lock);
+                                (*cb) (req->cba, req->tcall, req->rcall,
+                                       req->err);
                        }
+                        kfree(req);
+                        wake_up(&m->equeue);
+                        break;
                }
+        }
-                spin_unlock(&v9ses->muxlock);
+        if (!cb)
+                spin_unlock(&m->lock);
-                if (!req) {
+        v9fs_mux_put_tag(m, tag);
-                        if (err >= 0)
+        kfree(tc);
-                                dprintk(DEBUG_ERROR,
+        kfree(rc);
-                                        "unexpected response: id %d tag %d\n",
+}
-                                        rcall->id, rcall->tag);
-                        kfree(rcall);
+static void
-                }
+v9fs_mux_flush_request(struct v9fs_mux_data *m, struct v9fs_req *req)
+{
+        struct v9fs_fcall *fc;
-                wake_up_all(&v9ses->read_wait);
+        dprintk(DEBUG_MUX, "mux %p req %p tag %d\n", m, req, req->tag);
-                set_current_state(TASK_INTERRUPTIBLE);
+        fc = v9fs_create_tflush(req->tag);
+        v9fs_send_request(m, fc, v9fs_mux_flush_cb, m);
+}
+static void
+v9fs_mux_rpc_cb(void *a, struct v9fs_fcall *tc, struct v9fs_fcall *rc, int err)
+{
+        struct v9fs_mux_rpc *r;
+        if (err == ERREQFLUSH) {
+                dprintk(DEBUG_MUX, "err req flush\n");
+                return;
        }
-        v9ses->transport->close(v9ses->transport);
+        r = a;
+        dprintk(DEBUG_MUX, "mux %p req %p tc %p rc %p err %d\n", r->m, r->req,
+                tc, rc, err);
+        r->rcall = rc;
+        r->err = err;
+        wake_up(&r->wqueue);
+}
-        /* Inform all pending processes about the failure */
+/**
-        wake_up_all(&v9ses->read_wait);
+ * v9fs_mux_rpc - sends 9P request and waits until a response is available.
+ *      The function can be interrupted.
+ * @m: mux data
+ * @tc: request to be sent
+ * @rc: pointer where a pointer to the response is stored
+ */
+int
+v9fs_mux_rpc(struct v9fs_mux_data *m, struct v9fs_fcall *tc,
+             struct v9fs_fcall **rc)
+{
+        int err;
+        unsigned long flags;
+        struct v9fs_req *req;
+        struct v9fs_mux_rpc r;
+        r.err = 0;
+        r.rcall = NULL;
+        r.m = m;
+        init_waitqueue_head(&r.wqueue);
+        if (rc)
+                *rc = NULL;
+        req = v9fs_send_request(m, tc, v9fs_mux_rpc_cb, &r);
+        if (IS_ERR(req)) {
+                err = PTR_ERR(req);
+                dprintk(DEBUG_MUX, "error %d\n", err);
+                return PTR_ERR(req);
+        }
-        if (signal_pending(current))
+        r.req = req;
-                complete(&v9ses->proccmpl);
+        dprintk(DEBUG_MUX, "mux %p tc %p tag %d rpc %p req %p\n", m, tc,
+                req->tag, &r, req);
+        err = wait_event_interruptible(r.wqueue, r.rcall != NULL || r.err < 0);
+        if (r.err < 0)
+                err = r.err;
+        if (err == -ERESTARTSYS && m->trans->status == Connected && m->err == 0) {
+                spin_lock(&m->lock);
+                req->tcall = NULL;
+                req->err = ERREQFLUSH;
+                spin_unlock(&m->lock);
+                clear_thread_flag(TIF_SIGPENDING);
+                v9fs_mux_flush_request(m, req);
+                spin_lock_irqsave(&current->sighand->siglock, flags);
+                recalc_sigpending();
+                spin_unlock_irqrestore(&current->sighand->siglock, flags);
+        }
-        dprintk(DEBUG_MUX, "recvproc: end\n");
+        if (!err) {
-        v9ses->recvproc = NULL;
+                if (r.rcall)
+                        dprintk(DEBUG_MUX, "got response id %d tag %d\n",
+                                r.rcall->id, r.rcall->tag);
+                if (rc)
+                        *rc = r.rcall;
+                else
+                        kfree(r.rcall);
+        } else {
+                kfree(r.rcall);
+                dprintk(DEBUG_MUX, "got error %d\n", err);
+                if (err > 0)
+                        err = -EIO;
+        }
-        return err >= 0;
+        return err;
 }
 /**
- * v9fs_mux_init - initialize multiplexer (spawn kproc)
+ * v9fs_mux_rpcnb - sends 9P request without waiting for response.
- * @v9ses: session info structure
+ * @m: mux data
- * @dev_name: mount device information (to create unique kproc)
+ * @tc: request to be sent
- *
+ * @cb: callback function to be called when response arrives
+ * @cba: value to pass to the callback function
 */
+int v9fs_mux_rpcnb(struct v9fs_mux_data *m, struct v9fs_fcall *tc,
+                   v9fs_mux_req_callback cb, void *a)
+{
+        int err;
+        struct v9fs_req *req;
+        req = v9fs_send_request(m, tc, cb, a);
+        if (IS_ERR(req)) {
+                err = PTR_ERR(req);
+                dprintk(DEBUG_MUX, "error %d\n", err);
+                return PTR_ERR(req);
+        }
+        dprintk(DEBUG_MUX, "mux %p tc %p tag %d\n", m, tc, req->tag);
+        return 0;
+}
-int v9fs_mux_init(struct v9fs_session_info *v9ses, const char *dev_name)
+/**
+ * v9fs_mux_cancel - cancel all pending requests with error
+ * @m: mux data
+ * @err: error code
+ */
+void v9fs_mux_cancel(struct v9fs_mux_data *m, int err)
 {
-        char procname[60];
+        struct v9fs_req *req, *rtmp;
+        LIST_HEAD(cancel_list);
-        strncpy(procname, dev_name, sizeof(procname));
-        procname[sizeof(procname) - 1] = 0;
+        dprintk(DEBUG_MUX, "mux %p err %d\n", m, err);
+        m->err = err;
-        init_waitqueue_head(&v9ses->read_wait);
+        spin_lock(&m->lock);
-        init_completion(&v9ses->fcread);
+        list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) {
-        init_completion(&v9ses->proccmpl);
+                list_move(&req->req_list, &cancel_list);
-        spin_lock_init(&v9ses->muxlock);
-        INIT_LIST_HEAD(&v9ses->mux_fcalls);
-        v9ses->recvproc = NULL;
-        v9ses->curfcall = NULL;
-        v9ses->recvproc = kthread_create(v9fs_recvproc, v9ses,
-                                         "v9fs_recvproc %s", procname);
-        if (IS_ERR(v9ses->recvproc)) {
-                eprintk(KERN_ERR, "cannot create receiving thread\n");
-                v9fs_session_close(v9ses);
-                return -ECONNABORTED;
        }
+        spin_unlock(&m->lock);
-        wake_up_process(v9ses->recvproc);
+        list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) {
-        wait_for_completion(&v9ses->proccmpl);
+                list_del(&req->req_list);
+                if (!req->err)
+                        req->err = err;
-        return 0;
+                if (req->cb)
+                        (*req->cb) (req->cba, req->tcall, req->rcall, req->err);
+                else
+                        kfree(req->rcall);
+                kfree(req);
+        }
+        wake_up(&m->equeue);
+}
+static u16 v9fs_mux_get_tag(struct v9fs_mux_data *m)
+{
+        int tag;
+        tag = v9fs_get_idpool(&m->tidpool);
+        if (tag < 0)
+                return V9FS_NOTAG;
+        else
+                return (u16) tag;
+}
+static void v9fs_mux_put_tag(struct v9fs_mux_data *m, u16 tag)
+{
+        if (tag != V9FS_NOTAG && v9fs_check_idpool(tag, &m->tidpool))
+                v9fs_put_idpool(tag, &m->tidpool);
 }
diff --git a/fs/9p/mux.h b/fs/9p/mux.h
index 4994cb10badf..9473b84f24b2 100644
--- a/fs/9p/mux.h
+++ b/fs/9p/mux.h
@@ -3,6 +3,7 @@
 *
 * Multiplexer Definitions
 *
+ *  Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
 *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
 *
 *  This program is free software; you can redistribute it and/or modify
@@ -23,19 +24,35 @@
 *
 */
-/* structure to manage each RPC transaction */
+struct v9fs_mux_data;
-struct v9fs_rpcreq {
+/**
-        struct v9fs_fcall *tcall;
+ * v9fs_mux_req_callback - callback function that is called when the
-        struct v9fs_fcall *rcall;
+ * response of a request is received. The callback is called from
-        int err;        /* error code if response failed */
+ * a workqueue and shouldn't block.
+ *
+ * @a - the pointer that was specified when the request was send to be
+ *      passed to the callback
+ * @tc - request call
+ * @rc - response call
+ * @err - error code (non-zero if error occured)
+ */
+typedef void (*v9fs_mux_req_callback)(void *a, struct v9fs_fcall *tc,
+        struct v9fs_fcall *rc, int err);
+int v9fs_mux_global_init(void);
+void v9fs_mux_global_exit(void);
-        /* XXX - could we put scatter/gather buffers here? */
+struct v9fs_mux_data *v9fs_mux_init(struct v9fs_transport *trans, int msize,
+        unsigned char *extended);
+void v9fs_mux_destroy(struct v9fs_mux_data *);
-        struct list_head next;
+int v9fs_mux_send(struct v9fs_mux_data *m, struct v9fs_fcall *tc);
-};
+struct v9fs_fcall *v9fs_mux_recv(struct v9fs_mux_data *m);
+int v9fs_mux_rpc(struct v9fs_mux_data *m, struct v9fs_fcall *tc, struct v9fs_fcall **rc);
+int v9fs_mux_rpcnb(struct v9fs_mux_data *m, struct v9fs_fcall *tc,
+        v9fs_mux_req_callback cb, void *a);
-int v9fs_mux_init(struct v9fs_session_info *v9ses, const char *dev_name);
+void v9fs_mux_flush(struct v9fs_mux_data *m, int sendflush);
-long v9fs_mux_rpc(struct v9fs_session_info *v9ses,
+void v9fs_mux_cancel(struct v9fs_mux_data *m, int err);
-                  struct v9fs_fcall *tcall, struct v9fs_fcall **rcall);
+int v9fs_errstr2errno(char *errstr, int len);
-void v9fs_mux_cancel_requests(struct v9fs_session_info *v9ses, int err);
diff --git a/fs/9p/trans_fd.c b/fs/9p/trans_fd.c
index 63b58ce98ff4..1a28ef97a3d1 100644
--- a/fs/9p/trans_fd.c
+++ b/fs/9p/trans_fd.c
@@ -3,6 +3,7 @@
 *
 * File Descriptor Transport Layer
 *
+ *  Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
 *  Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com>
 *
 *  This program is free software; you can redistribute it and/or modify
@@ -106,9 +107,6 @@ v9fs_fd_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
                return -ENOPROTOOPT;
        }
-        sema_init(&trans->writelock, 1);
-        sema_init(&trans->readlock, 1);
        ts = kmalloc(sizeof(struct v9fs_trans_fd), GFP_KERNEL);
        if (!ts)
@@ -148,12 +146,12 @@ static void v9fs_fd_close(struct v9fs_transport *trans)
        if (!trans)
                return;
-        trans->status = Disconnected;
+        ts = xchg(&trans->priv, NULL);
-        ts = trans->priv;
        if (!ts)
                return;
+        trans->status = Disconnected;
        if (ts->in_file)
                fput(ts->in_file);
@@ -163,10 +161,55 @@ static void v9fs_fd_close(struct v9fs_transport *trans)
        kfree(ts);
 }
+static unsigned int
+v9fs_fd_poll(struct v9fs_transport *trans, struct poll_table_struct *pt)
+{
+        int ret, n;
+        struct v9fs_trans_fd *ts;
+        mm_segment_t oldfs;
+        if (!trans)
+                return -EIO;
+        ts = trans->priv;
+        if (trans->status != Connected || !ts)
+                return -EIO;
+        oldfs = get_fs();
+        set_fs(get_ds());
+        if (!ts->in_file->f_op || !ts->in_file->f_op->poll) {
+                ret = -EIO;
+                goto end;
+        }
+        ret = ts->in_file->f_op->poll(ts->in_file, pt);
+        if (ts->out_file != ts->in_file) {
+                if (!ts->out_file->f_op || !ts->out_file->f_op->poll) {
+                        ret = -EIO;
+                        goto end;
+                }
+                n = ts->out_file->f_op->poll(ts->out_file, pt);
+                ret &= ~POLLOUT;
+                n &= ~POLLIN;
+                ret |= n;
+        }
+end:
+        set_fs(oldfs);
+        return ret;
+}
 struct v9fs_transport v9fs_trans_fd = {
        .init = v9fs_fd_init,
        .write = v9fs_fd_send,
        .read = v9fs_fd_recv,
        .close = v9fs_fd_close,
+        .poll = v9fs_fd_poll,
 };
diff --git a/fs/9p/trans_sock.c b/fs/9p/trans_sock.c
index 6a9a75d40f73..44e830697acb 100644
--- a/fs/9p/trans_sock.c
+++ b/fs/9p/trans_sock.c
@@ -3,6 +3,7 @@
 *
 * Socket Transport Layer
 *
+ *  Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
 *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
 *  Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com>
 *  Copyright (C) 1995, 1996 by Olaf Kirch <okir@monad.swb.de>
@@ -36,6 +37,7 @@
 #include <asm/uaccess.h>
 #include <linux/inet.h>
 #include <linux/idr.h>
+#include <linux/file.h>
 #include "debug.h"
 #include "v9fs.h"
@@ -45,6 +47,7 @@
 struct v9fs_trans_sock {
        struct socket *s;
+        struct file *filp;
 };
 /**
@@ -57,41 +60,26 @@ struct v9fs_trans_sock {
 static int v9fs_sock_recv(struct v9fs_transport *trans, void *v, int len)
 {
-        struct msghdr msg;
+        int ret;
-        struct kvec iov;
+        struct v9fs_trans_sock *ts;
-        int result;
-        mm_segment_t oldfs;
-        struct v9fs_trans_sock *ts = trans ? trans->priv : NULL;
-        if (trans->status == Disconnected)
+        if (!trans || trans->status == Disconnected) {
+                dprintk(DEBUG_ERROR, "disconnected ...\n");
                return -EREMOTEIO;
+        }
-        result = -EINVAL;
+        ts = trans->priv;
-        oldfs = get_fs();
-        set_fs(get_ds());
-        iov.iov_base = v;
-        iov.iov_len = len;
-        msg.msg_name = NULL;
-        msg.msg_namelen = 0;
-        msg.msg_iovlen = 1;
-        msg.msg_control = NULL;
-        msg.msg_controllen = 0;
-        msg.msg_namelen = 0;
-        msg.msg_flags = MSG_NOSIGNAL;
-        result = kernel_recvmsg(ts->s, &msg, &iov, 1, len, 0);
+        if (!(ts->filp->f_flags & O_NONBLOCK))
+                dprintk(DEBUG_ERROR, "blocking read ...\n");
-        dprintk(DEBUG_TRANS, "socket state %d\n", ts->s->state);
+        ret = kernel_read(ts->filp, ts->filp->f_pos, v, len);
-        set_fs(oldfs);
+        if (ret <= 0) {
+                if (ret != -ERESTARTSYS && ret != -EAGAIN)
-        if (result <= 0) {
-                if (result != -ERESTARTSYS)
                        trans->status = Disconnected;
        }
-        return result;
+        return ret;
 }
 /**
@@ -104,40 +92,72 @@ static int v9fs_sock_recv(struct v9fs_transport *trans, void *v, int len)
 static int v9fs_sock_send(struct v9fs_transport *trans, void *v, int len)
 {
-        struct kvec iov;
+        int ret;
-        struct msghdr msg;
-        int result = -1;
        mm_segment_t oldfs;
-        struct v9fs_trans_sock *ts = trans ? trans->priv : NULL;
+        struct v9fs_trans_sock *ts;
-        dprintk(DEBUG_TRANS, "Sending packet size %d (%x)\n", len, len);
+        if (!trans || trans->status == Disconnected) {
-        dump_data(v, len);
+                dprintk(DEBUG_ERROR, "disconnected ...\n");
+                return -EREMOTEIO;
+        }
+        ts = trans->priv;
+        if (!ts) {
+                dprintk(DEBUG_ERROR, "no transport ...\n");
+                return -EREMOTEIO;
+        }
-        down(&trans->writelock);
+        if (!(ts->filp->f_flags & O_NONBLOCK))
+                dprintk(DEBUG_ERROR, "blocking write ...\n");
        oldfs = get_fs();
        set_fs(get_ds());
-        iov.iov_base = v;
+        ret = vfs_write(ts->filp, (void __user *)v, len, &ts->filp->f_pos);
-        iov.iov_len = len;
-        msg.msg_name = NULL;
-        msg.msg_namelen = 0;
-        msg.msg_iovlen = 1;
-        msg.msg_control = NULL;
-        msg.msg_controllen = 0;
-        msg.msg_namelen = 0;
-        msg.msg_flags = MSG_NOSIGNAL;
-        result = kernel_sendmsg(ts->s, &msg, &iov, 1, len);
        set_fs(oldfs);
-        if (result < 0) {
+        if (ret < 0) {
-                if (result != -ERESTARTSYS)
+                if (ret != -ERESTARTSYS)
                        trans->status = Disconnected;
        }
-        up(&trans->writelock);
+        return ret;
-        return result;
+}
+static unsigned int v9fs_sock_poll(struct v9fs_transport *trans,
+        struct poll_table_struct *pt) {
+        int ret;
+        struct v9fs_trans_sock *ts;
+        mm_segment_t oldfs;
+        if (!trans) {
+                dprintk(DEBUG_ERROR, "no transport\n");
+                return -EIO;
+        }
+        ts = trans->priv;
+        if (trans->status != Connected || !ts) {
+                dprintk(DEBUG_ERROR, "transport disconnected: %d\n", trans->status);
+                return -EIO;
+        }
+        oldfs = get_fs();
+        set_fs(get_ds());
+        if (!ts->filp->f_op || !ts->filp->f_op->poll) {
+                dprintk(DEBUG_ERROR, "no poll operation\n");
+                ret = -EIO;
+                goto end;
+        }
+        ret = ts->filp->f_op->poll(ts->filp, pt);
+end:
+        set_fs(oldfs);
+        return ret;
 }
 /**
 * v9fs_tcp_init - initialize TCP socket
 * @v9ses: session information
@@ -154,9 +174,9 @@ v9fs_tcp_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
        int rc = 0;
        struct v9fs_trans_sock *ts = NULL;
        struct v9fs_transport *trans = v9ses->transport;
+        int fd;
-        sema_init(&trans->writelock, 1);
+        trans->status = Disconnected;
-        sema_init(&trans->readlock, 1);
        ts = kmalloc(sizeof(struct v9fs_trans_sock), GFP_KERNEL);
@@ -165,6 +185,7 @@ v9fs_tcp_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
        trans->priv = ts;
        ts->s = NULL;
+        ts->filp = NULL;
        if (!addr)
                return -EINVAL;
@@ -185,7 +206,18 @@ v9fs_tcp_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
                return rc;
        }
        csocket->sk->sk_allocation = GFP_NOIO;
+        fd = sock_map_fd(csocket);
+        if (fd < 0) {
+                sock_release(csocket);
+                kfree(ts);
+                trans->priv = NULL;
+                return fd;
+        }
        ts->s = csocket;
+        ts->filp = fget(fd);
+        ts->filp->f_flags |= O_NONBLOCK;
        trans->status = Connected;
        return 0;
@@ -203,7 +235,7 @@ static int
 v9fs_unix_init(struct v9fs_session_info *v9ses, const char *dev_name,
               char *data)
 {
-        int rc;
+        int rc, fd;
        struct socket *csocket;
        struct sockaddr_un sun_server;
        struct v9fs_transport *trans;
@@ -213,6 +245,8 @@ v9fs_unix_init(struct v9fs_session_info *v9ses, const char *dev_name,
        csocket = NULL;
        trans = v9ses->transport;
+        trans->status = Disconnected;
        if (strlen(dev_name) > UNIX_PATH_MAX) {
                eprintk(KERN_ERR, "v9fs_trans_unix: address too long: %s\n",
                        dev_name);
@@ -225,9 +259,7 @@ v9fs_unix_init(struct v9fs_session_info *v9ses, const char *dev_name,
        trans->priv = ts;
        ts->s = NULL;
+        ts->filp = NULL;
-        sema_init(&trans->writelock, 1);
-        sema_init(&trans->readlock, 1);
        sun_server.sun_family = PF_UNIX;
        strcpy(sun_server.sun_path, dev_name);
@@ -241,7 +273,18 @@ v9fs_unix_init(struct v9fs_session_info *v9ses, const char *dev_name,
                return rc;
        }
        csocket->sk->sk_allocation = GFP_NOIO;
+        fd = sock_map_fd(csocket);
+        if (fd < 0) {
+                sock_release(csocket);
+                kfree(ts);
+                trans->priv = NULL;
+                return fd;
+        }
        ts->s = csocket;
+        ts->filp = fget(fd);
+        ts->filp->f_flags |= O_NONBLOCK;
        trans->status = Connected;
        return 0;
@@ -262,12 +305,11 @@ static void v9fs_sock_close(struct v9fs_transport *trans)
        ts = trans->priv;
-        if ((ts) && (ts->s)) {
+        if ((ts) && (ts->filp)) {
-                dprintk(DEBUG_TRANS, "closing the socket %p\n", ts->s);
+                fput(ts->filp);
-                sock_release(ts->s);
+                ts->filp = NULL;
                ts->s = NULL;
                trans->status = Disconnected;
-                dprintk(DEBUG_TRANS, "socket closed\n");
        }
        kfree(ts);
@@ -280,6 +322,7 @@ struct v9fs_transport v9fs_trans_tcp = {
        .write = v9fs_sock_send,
        .read = v9fs_sock_recv,
        .close = v9fs_sock_close,
+        .poll = v9fs_sock_poll,
 };
 struct v9fs_transport v9fs_trans_unix = {
@@ -287,4 +330,5 @@ struct v9fs_transport v9fs_trans_unix = {
        .write = v9fs_sock_send,
        .read = v9fs_sock_recv,
        .close = v9fs_sock_close,
+        .poll = v9fs_sock_poll,
 };
diff --git a/fs/9p/transport.h b/fs/9p/transport.h
index 9e9cd418efd5..91fcdb94b361 100644
--- a/fs/9p/transport.h
+++ b/fs/9p/transport.h
@@ -3,6 +3,7 @@
 *
 * Transport Definition
 *
+ *  Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
 *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
 *
 *  This program is free software; you can redistribute it and/or modify
@@ -31,14 +32,13 @@ enum v9fs_transport_status {
 struct v9fs_transport {
        enum v9fs_transport_status status;
-        struct semaphore writelock;
-        struct semaphore readlock;
        void *priv;
        int (*init) (struct v9fs_session_info *, const char *, char *);
        int (*write) (struct v9fs_transport *, void *, int);
        int (*read) (struct v9fs_transport *, void *, int);
        void (*close) (struct v9fs_transport *);
+        unsigned int (*poll)(struct v9fs_transport *, struct poll_table_struct *);
 };
 extern struct v9fs_transport v9fs_trans_tcp;
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 418c3743fdee..5250c428fc1f 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -37,7 +37,6 @@
 #include "v9fs_vfs.h"
 #include "transport.h"
 #include "mux.h"
-#include "conv.h"
 /* TODO: sysfs or debugfs interface */
 int v9fs_debug_level = 0;       /* feature-rific global debug level  */
@@ -213,7 +212,8 @@ retry:
                return -1;
        }
-        error = idr_get_new(&p->pool, NULL, &i);
+        /* no need to store exactly p, we just need something non-null */
+        error = idr_get_new(&p->pool, p, &i);
        up(&p->lock);
        if (error == -EAGAIN)
@@ -243,6 +243,16 @@ void v9fs_put_idpool(int id, struct v9fs_idpool *p)
 }
 /**
+ * v9fs_check_idpool - check if the specified id is available
+ * @id - id to check
+ * @p - pool
+ */
+int v9fs_check_idpool(int id, struct v9fs_idpool *p)
+{
+        return idr_find(&p->pool, id) != NULL;
+}
+/**
 * v9fs_session_init - initialize session
 * @v9ses: session information structure
 * @dev_name: device being mounted
@@ -259,6 +269,7 @@ v9fs_session_init(struct v9fs_session_info *v9ses,
        int n = 0;
        int newfid = -1;
        int retval = -EINVAL;
+        struct v9fs_str *version;
        v9ses->name = __getname();
        if (!v9ses->name)
@@ -281,9 +292,6 @@ v9fs_session_init(struct v9fs_session_info *v9ses,
        /* id pools that are session-dependent: FIDs and TIDs */
        idr_init(&v9ses->fidpool.pool);
        init_MUTEX(&v9ses->fidpool.lock);
-        idr_init(&v9ses->tidpool.pool);
-        init_MUTEX(&v9ses->tidpool.lock);
        switch (v9ses->proto) {
        case PROTO_TCP:
@@ -320,7 +328,12 @@ v9fs_session_init(struct v9fs_session_info *v9ses,
        v9ses->shutdown = 0;
        v9ses->session_hung = 0;
-        if ((retval = v9fs_mux_init(v9ses, dev_name)) < 0) {
+        v9ses->mux = v9fs_mux_init(v9ses->transport, v9ses->maxdata + V9FS_IOHDRSZ,
+                &v9ses->extended);
+        if (IS_ERR(v9ses->mux)) {
+                retval = PTR_ERR(v9ses->mux);
+                v9ses->mux = NULL;
                dprintk(DEBUG_ERROR, "problem initializing mux\n");
                goto SessCleanUp;
        }
@@ -339,13 +352,16 @@ v9fs_session_init(struct v9fs_session_info *v9ses,
                        goto FreeFcall;
                }
-                /* Really should check for 9P1 and report error */
+                version = &fcall->params.rversion.version;
-                if (!strcmp(fcall->params.rversion.version, "9P2000.u")) {
+                if (version->len==8 && !memcmp(version->str, "9P2000.u", 8)) {
                        dprintk(DEBUG_9P, "9P2000 UNIX extensions enabled\n");
                        v9ses->extended = 1;
-                } else {
+                } else if (version->len==6 && !memcmp(version->str, "9P2000", 6)) {
                        dprintk(DEBUG_9P, "9P2000 legacy mode enabled\n");
                        v9ses->extended = 0;
+                } else {
+                        retval = -EREMOTEIO;
+                        goto FreeFcall;
                }
                n = fcall->params.rversion.msize;
@@ -381,7 +397,7 @@ v9fs_session_init(struct v9fs_session_info *v9ses,
        }
        if (v9ses->afid != ~0) {
-                if (v9fs_t_clunk(v9ses, v9ses->afid, NULL))
+                if (v9fs_t_clunk(v9ses, v9ses->afid))
                        dprintk(DEBUG_ERROR, "clunk failed\n");
        }
@@ -403,13 +419,16 @@ v9fs_session_init(struct v9fs_session_info *v9ses,
 void v9fs_session_close(struct v9fs_session_info *v9ses)
 {
-        if (v9ses->recvproc) {
+        if (v9ses->mux) {
-                send_sig(SIGKILL, v9ses->recvproc, 1);
+                v9fs_mux_destroy(v9ses->mux);
-                wait_for_completion(&v9ses->proccmpl);
+                v9ses->mux = NULL;
        }
-        if (v9ses->transport)
+        if (v9ses->transport) {
                v9ses->transport->close(v9ses->transport);
+                kfree(v9ses->transport);
+                v9ses->transport = NULL;
+        }
        __putname(v9ses->name);
        __putname(v9ses->remotename);
@@ -420,8 +439,9 @@ void v9fs_session_close(struct v9fs_session_info *v9ses)
 *      and cancel all pending requests.
 */
 void v9fs_session_cancel(struct v9fs_session_info *v9ses) {
+        dprintk(DEBUG_ERROR, "cancel session %p\n", v9ses);
        v9ses->transport->status = Disconnected;
-        v9fs_mux_cancel_requests(v9ses, -EIO);
+        v9fs_mux_cancel(v9ses->mux, -EIO);
 }
 extern int v9fs_error_init(void);
@@ -433,11 +453,17 @@ extern int v9fs_error_init(void);
 static int __init init_v9fs(void)
 {
+        int ret;
        v9fs_error_init();
        printk(KERN_INFO "Installing v9fs 9P2000 file system support\n");
-        return register_filesystem(&v9fs_fs_type);
+        ret = v9fs_mux_global_init();
+        if (!ret)
+                ret = register_filesystem(&v9fs_fs_type);
+        return ret;
 }
 /**
@@ -447,6 +473,7 @@ static int __init init_v9fs(void)
 static void __exit exit_v9fs(void)
 {
+        v9fs_mux_global_exit();
        unregister_filesystem(&v9fs_fs_type);
 }
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 45dcef42bdd6..f337da7a0eec 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -57,24 +57,14 @@ struct v9fs_session_info {
        /* book keeping */
        struct v9fs_idpool fidpool;     /* The FID pool for file descriptors */
-        struct v9fs_idpool tidpool;     /* The TID pool for transactions ids */
-        /* transport information */
        struct v9fs_transport *transport;
+        struct v9fs_mux_data *mux;
        int inprogress;         /* session in progress => true */
        int shutdown;           /* session shutting down. no more attaches. */
        unsigned char session_hung;
+        struct dentry *debugfs_dir;
-        /* mux private data */
-        struct v9fs_fcall *curfcall;
-        wait_queue_head_t read_wait;
-        struct completion fcread;
-        struct completion proccmpl;
-        struct task_struct *recvproc;
-        spinlock_t muxlock;
-        struct list_head mux_fcalls;
 };
 /* possible values of ->proto */
@@ -84,11 +74,14 @@ enum {
        PROTO_FD,
 };
+extern struct dentry *v9fs_debugfs_root;
 int v9fs_session_init(struct v9fs_session_info *, const char *, char *);
 struct v9fs_session_info *v9fs_inode2v9ses(struct inode *);
 void v9fs_session_close(struct v9fs_session_info *v9ses);
 int v9fs_get_idpool(struct v9fs_idpool *p);
 void v9fs_put_idpool(int id, struct v9fs_idpool *p);
+int v9fs_check_idpool(int id, struct v9fs_idpool *p);
 void v9fs_session_cancel(struct v9fs_session_info *v9ses);
 #define V9FS_MAGIC 0x01021997
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 2f2cea7ee3e7..69cf2905dc90 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -39,15 +39,15 @@
 */
 extern struct file_system_type v9fs_fs_type;
+extern struct address_space_operations v9fs_addr_operations;
 extern struct file_operations v9fs_file_operations;
 extern struct file_operations v9fs_dir_operations;
 extern struct dentry_operations v9fs_dentry_operations;
 struct inode *v9fs_get_inode(struct super_block *sb, int mode);
 ino_t v9fs_qid2ino(struct v9fs_qid *qid);
-void v9fs_mistat2inode(struct v9fs_stat *, struct inode *,
+void v9fs_stat2inode(struct v9fs_stat *, struct inode *, struct super_block *);
-                       struct super_block *);
 int v9fs_dir_release(struct inode *inode, struct file *filp);
 int v9fs_file_open(struct inode *inode, struct file *file);
-void v9fs_inode2mistat(struct inode *inode, struct v9fs_stat *mistat);
+void v9fs_inode2stat(struct inode *inode, struct v9fs_stat *stat);
 void v9fs_dentry_release(struct dentry *);
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
new file mode 100644
index 000000000000..8100fb5171b7
--- /dev/null
+++ b/fs/9p/vfs_addr.c
@@ -0,0 +1,109 @@
+/*
+ *  linux/fs/9p/vfs_addr.c
+ *
+ * This file contians vfs address (mmap) ops for 9P2000.
+ *
+ *  Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com>
+ *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/smp_lock.h>
+#include <linux/inet.h>
+#include <linux/version.h>
+#include <linux/pagemap.h>
+#include <linux/idr.h>
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "v9fs_vfs.h"
+#include "fid.h"
+/**
+ * v9fs_vfs_readpage - read an entire page in from 9P
+ *
+ * @file: file being read
+ * @page: structure to page
+ *
+ */
+static int v9fs_vfs_readpage(struct file *filp, struct page *page)
+{
+        char *buffer = NULL;
+        int retval = -EIO;
+        loff_t offset = page_offset(page);
+        int count = PAGE_CACHE_SIZE;
+        struct inode *inode = filp->f_dentry->d_inode;
+        struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
+        int rsize = v9ses->maxdata - V9FS_IOHDRSZ;
+        struct v9fs_fid *v9f = filp->private_data;
+        struct v9fs_fcall *fcall = NULL;
+        int fid = v9f->fid;
+        int total = 0;
+        int result = 0;
+        buffer = kmap(page);
+        do {
+                if (count < rsize)
+                        rsize = count;
+                result = v9fs_t_read(v9ses, fid, offset, rsize, &fcall);
+                if (result < 0) {
+                        printk(KERN_ERR "v9fs_t_read returned %d\n",
+                               result);
+                        kfree(fcall);
+                        goto UnmapAndUnlock;
+                } else
+                        offset += result;
+                memcpy(buffer, fcall->params.rread.data, result);
+                count -= result;
+                buffer += result;
+                total += result;
+                kfree(fcall);
+                if (result < rsize)
+                        break;
+        } while (count);
+        memset(buffer, 0, count);
+        flush_dcache_page(page);
+        SetPageUptodate(page);
+        retval = 0;
+UnmapAndUnlock:
+        kunmap(page);
+        unlock_page(page);
+        return retval;
+}
+struct address_space_operations v9fs_addr_operations = {
+      .readpage = v9fs_vfs_readpage,
+};
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index a6aa947de0f9..2dd806dac9f1 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -40,7 +40,6 @@
 #include "v9fs.h"
 #include "9p.h"
 #include "v9fs_vfs.h"
-#include "conv.h"
 #include "fid.h"
 /**
@@ -95,24 +94,22 @@ static int v9fs_dentry_validate(struct dentry *dentry, struct nameidata *nd)
 void v9fs_dentry_release(struct dentry *dentry)
 {
+        int err;
        dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
        if (dentry->d_fsdata != NULL) {
                struct list_head *fid_list = dentry->d_fsdata;
                struct v9fs_fid *temp = NULL;
                struct v9fs_fid *current_fid = NULL;
-                struct v9fs_fcall *fcall = NULL;
                list_for_each_entry_safe(current_fid, temp, fid_list, list) {
-                        if (v9fs_t_clunk
+                        err = v9fs_t_clunk(current_fid->v9ses, current_fid->fid);
-                            (current_fid->v9ses, current_fid->fid, &fcall))
-                                dprintk(DEBUG_ERROR, "clunk failed: %s\n",
-                                        FCALL_ERROR(fcall));
-                        v9fs_put_idpool(current_fid->fid,
+                        if (err < 0)
-                                        &current_fid->v9ses->fidpool);
+                                dprintk(DEBUG_ERROR, "clunk failed: %d name %s\n",
+                                        err, dentry->d_iname);
-                        kfree(fcall);
                        v9fs_fid_destroy(current_fid);
                }
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 57a43b8feef5..ae6d032b9b59 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -37,8 +37,8 @@
 #include "debug.h"
 #include "v9fs.h"
 #include "9p.h"
-#include "v9fs_vfs.h"
 #include "conv.h"
+#include "v9fs_vfs.h"
 #include "fid.h"
 /**
@@ -74,20 +74,16 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
        struct inode *inode = filp->f_dentry->d_inode;
        struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
        struct v9fs_fid *file = filp->private_data;
-        unsigned int i, n;
+        unsigned int i, n, s;
        int fid = -1;
        int ret = 0;
-        struct v9fs_stat *mi = NULL;
+        struct v9fs_stat stat;
        int over = 0;
        dprintk(DEBUG_VFS, "name %s\n", filp->f_dentry->d_name.name);
        fid = file->fid;
-        mi = kmalloc(v9ses->maxdata, GFP_KERNEL);
-        if (!mi)
-                return -ENOMEM;
        if (file->rdir_fcall && (filp->f_pos != file->rdir_pos)) {
                kfree(file->rdir_fcall);
                file->rdir_fcall = NULL;
@@ -97,20 +93,20 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
                n = file->rdir_fcall->params.rread.count;
                i = file->rdir_fpos;
                while (i < n) {
-                        int s = v9fs_deserialize_stat(v9ses,
+                        s = v9fs_deserialize_stat(
-                                  file->rdir_fcall->params.rread.data + i,
+                                file->rdir_fcall->params.rread.data + i,
-                                  n - i, mi, v9ses->maxdata);
+                                n - i, &stat, v9ses->extended);
                        if (s == 0) {
                                dprintk(DEBUG_ERROR,
-                                        "error while deserializing mistat\n");
+                                        "error while deserializing stat\n");
                                ret = -EIO;
                                goto FreeStructs;
                        }
-                        over = filldir(dirent, mi->name, strlen(mi->name),
+                        over = filldir(dirent, stat.name.str, stat.name.len,
-                                    filp->f_pos, v9fs_qid2ino(&mi->qid),
+                                    filp->f_pos, v9fs_qid2ino(&stat.qid),
-                                    dt_type(mi));
+                                    dt_type(&stat));
                        if (over) {
                                file->rdir_fpos = i;
@@ -130,7 +126,7 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
        while (!over) {
                ret = v9fs_t_read(v9ses, fid, filp->f_pos,
-                                            v9ses->maxdata-V9FS_IOHDRSZ, &fcall);
+                        v9ses->maxdata-V9FS_IOHDRSZ, &fcall);
                if (ret < 0) {
                        dprintk(DEBUG_ERROR, "error while reading: %d: %p\n",
                                ret, fcall);
@@ -141,19 +137,18 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
                n = ret;
                i = 0;
                while (i < n) {
-                        int s = v9fs_deserialize_stat(v9ses,
+                        s = v9fs_deserialize_stat(fcall->params.rread.data + i,
-                                  fcall->params.rread.data + i, n - i, mi,
+                                n - i, &stat, v9ses->extended);
-                                  v9ses->maxdata);
                        if (s == 0) {
                                dprintk(DEBUG_ERROR,
-                                        "error while deserializing mistat\n");
+                                        "error while deserializing stat\n");
                                return -EIO;
                        }
-                        over = filldir(dirent, mi->name, strlen(mi->name),
+                        over = filldir(dirent, stat.name.str, stat.name.len,
-                                    filp->f_pos, v9fs_qid2ino(&mi->qid),
+                                    filp->f_pos, v9fs_qid2ino(&stat.qid),
-                                    dt_type(mi));
+                                    dt_type(&stat));
                        if (over) {
                                file->rdir_fcall = fcall;
@@ -172,7 +167,6 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
      FreeStructs:
        kfree(fcall);
-        kfree(mi);
        return ret;
 }
@@ -193,18 +187,15 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
                fid->fid);
        fidnum = fid->fid;
-        filemap_fdatawrite(inode->i_mapping);
+        filemap_write_and_wait(inode->i_mapping);
-        filemap_fdatawait(inode->i_mapping);
        if (fidnum >= 0) {
                dprintk(DEBUG_VFS, "fidopen: %d v9f->fid: %d\n", fid->fidopen,
                        fid->fid);
-                if (v9fs_t_clunk(v9ses, fidnum, NULL))
+                if (v9fs_t_clunk(v9ses, fidnum))
                        dprintk(DEBUG_ERROR, "clunk failed\n");
-                v9fs_put_idpool(fid->fid, &v9ses->fidpool);
                kfree(fid->rdir_fcall);
                kfree(fid);
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 89c849da8504..c7e14d917215 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -32,6 +32,7 @@
 #include <linux/string.h>
 #include <linux/smp_lock.h>
 #include <linux/inet.h>
+#include <linux/version.h>
 #include <linux/list.h>
 #include <asm/uaccess.h>
 #include <linux/idr.h>
@@ -117,9 +118,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
                result = v9fs_t_open(v9ses, newfid, open_mode, &fcall);
                if (result < 0) {
-                        dprintk(DEBUG_ERROR,
+                        PRINT_FCALL_ERROR("open failed", fcall);
-                                "open failed, open_mode 0x%x: %s\n", open_mode,
-                                FCALL_ERROR(fcall));
                        kfree(fcall);
                        return result;
                }
@@ -165,8 +164,7 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl)
                return -ENOLCK;
        if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) {
-                filemap_fdatawrite(inode->i_mapping);
+                filemap_write_and_wait(inode->i_mapping);
-                filemap_fdatawait(inode->i_mapping);
                invalidate_inode_pages(&inode->i_data);
        }
@@ -257,7 +255,6 @@ v9fs_file_write(struct file *filp, const char __user * data,
        int result = -EIO;
        int rsize = 0;
        int total = 0;
-        char *buf;
        dprintk(DEBUG_VFS, "data %p count %d offset %x\n", data, (int)count,
                (int)*offset);
@@ -265,28 +262,14 @@ v9fs_file_write(struct file *filp, const char __user * data,
        if (v9fid->iounit != 0 && rsize > v9fid->iounit)
                rsize = v9fid->iounit;
-        buf = kmalloc(v9ses->maxdata - V9FS_IOHDRSZ, GFP_KERNEL);
-        if (!buf)
-                return -ENOMEM;
        do {
                if (count < rsize)
                        rsize = count;
-                result = copy_from_user(buf, data, rsize);
+                result = v9fs_t_write(v9ses, fid, *offset, rsize, data, &fcall);
-                if (result) {
-                        dprintk(DEBUG_ERROR, "Problem copying from user\n");
-                        kfree(buf);
-                        return -EFAULT;
-                }
-                dump_data(buf, rsize);
-                result = v9fs_t_write(v9ses, fid, *offset, rsize, buf, &fcall);
                if (result < 0) {
-                        eprintk(KERN_ERR, "error while writing: %s(%d)\n",
+                        PRINT_FCALL_ERROR("error while writing", fcall);
-                                FCALL_ERROR(fcall), result);
                        kfree(fcall);
-                        kfree(buf);
                        return result;
                } else
                        *offset += result;
@@ -306,7 +289,9 @@ v9fs_file_write(struct file *filp, const char __user * data,
                total += result;
        } while (count);
-        kfree(buf);
+        if(inode->i_mapping->nrpages)
+                invalidate_inode_pages2(inode->i_mapping);
        return total;
 }
@@ -317,4 +302,5 @@ struct file_operations v9fs_file_operations = {
        .open = v9fs_file_open,
        .release = v9fs_dir_release,
        .lock = v9fs_file_lock,
+        .mmap = generic_file_mmap,
 };
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 0ea965c3bb7d..91f552454c76 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -40,7 +40,6 @@
 #include "v9fs.h"
 #include "9p.h"
 #include "v9fs_vfs.h"
-#include "conv.h"
 #include "fid.h"
 static struct inode_operations v9fs_dir_inode_operations;
@@ -127,100 +126,32 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode)
 }
 /**
- * v9fs_blank_mistat - helper function to setup a 9P stat structure
+ * v9fs_blank_wstat - helper function to setup a 9P stat structure
 * @v9ses: 9P session info (for determining extended mode)
- * @mistat: structure to initialize
+ * @wstat: structure to initialize
 *
 */
 static void
-v9fs_blank_mistat(struct v9fs_session_info *v9ses, struct v9fs_stat *mistat)
+v9fs_blank_wstat(struct v9fs_wstat *wstat)
 {
-        mistat->type = ~0;
+        wstat->type = ~0;
-        mistat->dev = ~0;
+        wstat->dev = ~0;
-        mistat->qid.type = ~0;
+        wstat->qid.type = ~0;
-        mistat->qid.version = ~0;
+        wstat->qid.version = ~0;
-        *((long long *)&mistat->qid.path) = ~0;
+        *((long long *)&wstat->qid.path) = ~0;
-        mistat->mode = ~0;
+        wstat->mode = ~0;
-        mistat->atime = ~0;
+        wstat->atime = ~0;
-        mistat->mtime = ~0;
+        wstat->mtime = ~0;
-        mistat->length = ~0;
+        wstat->length = ~0;
-        mistat->name = mistat->data;
+        wstat->name = NULL;
-        mistat->uid = mistat->data;
+        wstat->uid = NULL;
-        mistat->gid = mistat->data;
+        wstat->gid = NULL;
-        mistat->muid = mistat->data;
+        wstat->muid = NULL;
-        if (v9ses->extended) {
+        wstat->n_uid = ~0;
-                mistat->n_uid = ~0;
+        wstat->n_gid = ~0;
-                mistat->n_gid = ~0;
+        wstat->n_muid = ~0;
-                mistat->n_muid = ~0;
+        wstat->extension = NULL;
-                mistat->extension = mistat->data;
-        }
-        *mistat->data = 0;
-}
-/**
- * v9fs_mistat2unix - convert mistat to unix stat
- * @mistat: Plan 9 metadata (mistat) structure
- * @buf: unix metadata (stat) structure to populate
- * @sb: superblock
- *
- */
-static void
-v9fs_mistat2unix(struct v9fs_stat *mistat, struct stat *buf,
-                 struct super_block *sb)
-{
-        struct v9fs_session_info *v9ses = sb ? sb->s_fs_info : NULL;
-        buf->st_nlink = 1;
-        buf->st_atime = mistat->atime;
-        buf->st_mtime = mistat->mtime;
-        buf->st_ctime = mistat->mtime;
-        buf->st_uid = (unsigned short)-1;
-        buf->st_gid = (unsigned short)-1;
-        if (v9ses && v9ses->extended) {
-                /* TODO: string to uid mapping via user-space daemon */
-                if (mistat->n_uid != -1)
-                        sscanf(mistat->uid, "%x", (unsigned int *)&buf->st_uid);
-                if (mistat->n_gid != -1)
-                        sscanf(mistat->gid, "%x", (unsigned int *)&buf->st_gid);
-        }
-        if (buf->st_uid == (unsigned short)-1)
-                buf->st_uid = v9ses->uid;
-        if (buf->st_gid == (unsigned short)-1)
-                buf->st_gid = v9ses->gid;
-        buf->st_mode = p9mode2unixmode(v9ses, mistat->mode);
-        if ((S_ISBLK(buf->st_mode)) || (S_ISCHR(buf->st_mode))) {
-                char type = 0;
-                int major = -1;
-                int minor = -1;
-                sscanf(mistat->extension, "%c %u %u", &type, &major, &minor);
-                switch (type) {
-                case 'c':
-                        buf->st_mode &= ~S_IFBLK;
-                        buf->st_mode |= S_IFCHR;
-                        break;
-                case 'b':
-                        break;
-                default:
-                        dprintk(DEBUG_ERROR, "Unknown special type %c (%s)\n",
-                                type, mistat->extension);
-                };
-                buf->st_rdev = MKDEV(major, minor);
-        } else
-                buf->st_rdev = 0;
-        buf->st_size = mistat->length;
-        buf->st_blksize = sb->s_blocksize;
-        buf->st_blocks =
-            (buf->st_size + buf->st_blksize - 1) >> sb->s_blocksize_bits;
 }
 /**
@@ -246,6 +177,7 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
                inode->i_blocks = 0;
                inode->i_rdev = 0;
                inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+                inode->i_mapping->a_ops = &v9fs_addr_operations;
                switch (mode & S_IFMT) {
                case S_IFIFO:
@@ -312,12 +244,12 @@ v9fs_create(struct inode *dir,
        struct inode *file_inode = NULL;
        struct v9fs_fcall *fcall = NULL;
        struct v9fs_qid qid;
-        struct stat newstat;
        int dirfidnum = -1;
        long newfid = -1;
        int result = 0;
        unsigned int iounit = 0;
        int wfidno = -1;
+        int err;
        perm = unixmode2p9mode(v9ses, perm);
@@ -349,57 +281,64 @@ v9fs_create(struct inode *dir,
        result = v9fs_t_walk(v9ses, dirfidnum, newfid, NULL, &fcall);
        if (result < 0) {
-                dprintk(DEBUG_ERROR, "clone error: %s\n", FCALL_ERROR(fcall));
+                PRINT_FCALL_ERROR("clone error", fcall);
                v9fs_put_idpool(newfid, &v9ses->fidpool);
                newfid = -1;
                goto CleanUpFid;
        }
        kfree(fcall);
+        fcall = NULL;
        result = v9fs_t_create(v9ses, newfid, (char *)file_dentry->d_name.name,
                               perm, open_mode, &fcall);
        if (result < 0) {
-                dprintk(DEBUG_ERROR, "create fails: %s(%d)\n",
+                PRINT_FCALL_ERROR("create fails", fcall);
-                        FCALL_ERROR(fcall), result);
                goto CleanUpFid;
        }
        iounit = fcall->params.rcreate.iounit;
        qid = fcall->params.rcreate.qid;
        kfree(fcall);
+        fcall = NULL;
-        fid = v9fs_fid_create(file_dentry, v9ses, newfid, 1);
+        if (!(perm&V9FS_DMDIR)) {
-        dprintk(DEBUG_VFS, "fid %p %d\n", fid, fid->fidcreate);
+                fid = v9fs_fid_create(file_dentry, v9ses, newfid, 1);
-        if (!fid) {
+                dprintk(DEBUG_VFS, "fid %p %d\n", fid, fid->fidcreate);
-                result = -ENOMEM;
+                if (!fid) {
-                goto CleanUpFid;
+                        result = -ENOMEM;
-        }
+                        goto CleanUpFid;
+                }
-        fid->qid = qid;
+                fid->qid = qid;
-        fid->iounit = iounit;
+                fid->iounit = iounit;
+        } else {
+                err = v9fs_t_clunk(v9ses, newfid);
+                newfid = -1;
+                if (err < 0)
+                        dprintk(DEBUG_ERROR, "clunk for mkdir failed: %d\n", err);
+        }
        /* walk to the newly created file and put the fid in the dentry */
        wfidno = v9fs_get_idpool(&v9ses->fidpool);
-        if (newfid < 0) {
+        if (wfidno < 0) {
                eprintk(KERN_WARNING, "no free fids available\n");
                return -ENOSPC;
        }
        result = v9fs_t_walk(v9ses, dirfidnum, wfidno,
-                (char *) file_dentry->d_name.name, NULL);
+                (char *) file_dentry->d_name.name, &fcall);
        if (result < 0) {
-                dprintk(DEBUG_ERROR, "clone error: %s\n", FCALL_ERROR(fcall));
+                PRINT_FCALL_ERROR("clone error", fcall);
                v9fs_put_idpool(wfidno, &v9ses->fidpool);
                wfidno = -1;
                goto CleanUpFid;
        }
+        kfree(fcall);
+        fcall = NULL;
        if (!v9fs_fid_create(file_dentry, v9ses, wfidno, 0)) {
-                if (!v9fs_t_clunk(v9ses, newfid, &fcall)) {
+                v9fs_put_idpool(wfidno, &v9ses->fidpool);
-                        v9fs_put_idpool(wfidno, &v9ses->fidpool);
-                }
                goto CleanUpFid;
        }
@@ -409,62 +348,43 @@ v9fs_create(struct inode *dir,
            (perm & V9FS_DMDEVICE))
                return 0;
-        result = v9fs_t_stat(v9ses, newfid, &fcall);
+        result = v9fs_t_stat(v9ses, wfidno, &fcall);
        if (result < 0) {
-                dprintk(DEBUG_ERROR, "stat error: %s(%d)\n", FCALL_ERROR(fcall),
+                PRINT_FCALL_ERROR("stat error", fcall);
-                        result);
                goto CleanUpFid;
        }
-        v9fs_mistat2unix(fcall->params.rstat.stat, &newstat, sb);
-        file_inode = v9fs_get_inode(sb, newstat.st_mode);
+        file_inode = v9fs_get_inode(sb,
+                p9mode2unixmode(v9ses, fcall->params.rstat.stat.mode));
        if ((!file_inode) || IS_ERR(file_inode)) {
                dprintk(DEBUG_ERROR, "create inode failed\n");
                result = -EBADF;
                goto CleanUpFid;
        }
-        v9fs_mistat2inode(fcall->params.rstat.stat, file_inode, sb);
+        v9fs_stat2inode(&fcall->params.rstat.stat, file_inode, sb);
        kfree(fcall);
        fcall = NULL;
        file_dentry->d_op = &v9fs_dentry_operations;
        d_instantiate(file_dentry, file_inode);
-        if (perm & V9FS_DMDIR) {
-                if (!v9fs_t_clunk(v9ses, newfid, &fcall))
-                        v9fs_put_idpool(newfid, &v9ses->fidpool);
-                else
-                        dprintk(DEBUG_ERROR, "clunk for mkdir failed: %s\n",
-                                FCALL_ERROR(fcall));
-                kfree(fcall);
-                fid->fidopen = 0;
-                fid->fidcreate = 0;
-                d_drop(file_dentry);
-        }
        return 0;
      CleanUpFid:
        kfree(fcall);
+        fcall = NULL;
        if (newfid >= 0) {
-                if (!v9fs_t_clunk(v9ses, newfid, &fcall))
+                err = v9fs_t_clunk(v9ses, newfid);
-                        v9fs_put_idpool(newfid, &v9ses->fidpool);
+                if (err < 0)
-                else
+                        dprintk(DEBUG_ERROR, "clunk failed: %d\n", err);
-                        dprintk(DEBUG_ERROR, "clunk failed: %s\n",
-                                FCALL_ERROR(fcall));
-                kfree(fcall);
        }
        if (wfidno >= 0) {
-                if (!v9fs_t_clunk(v9ses, wfidno, &fcall))
+                err = v9fs_t_clunk(v9ses, wfidno);
-                        v9fs_put_idpool(wfidno, &v9ses->fidpool);
+                if (err < 0)
-                else
+                        dprintk(DEBUG_ERROR, "clunk failed: %d\n", err);
-                        dprintk(DEBUG_ERROR, "clunk failed: %s\n",
-                                FCALL_ERROR(fcall));
-                kfree(fcall);
        }
        return result;
 }
@@ -509,10 +429,9 @@ static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
        }
        result = v9fs_t_remove(v9ses, fid, &fcall);
-        if (result < 0)
+        if (result < 0) {
-                dprintk(DEBUG_ERROR, "remove of file fails: %s(%d)\n",
+                PRINT_FCALL_ERROR("remove fails", fcall);
-                        FCALL_ERROR(fcall), result);
+        } else {
-        else {
                v9fs_put_idpool(fid, &v9ses->fidpool);
                v9fs_fid_destroy(v9fid);
        }
@@ -567,7 +486,6 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
        struct v9fs_fid *fid;
        struct inode *inode;
        struct v9fs_fcall *fcall = NULL;
-        struct stat newstat;
        int dirfidnum = -1;
        int newfid = -1;
        int result = 0;
@@ -620,8 +538,8 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
                goto FreeFcall;
        }
-        v9fs_mistat2unix(fcall->params.rstat.stat, &newstat, sb);
+        inode = v9fs_get_inode(sb, p9mode2unixmode(v9ses,
-        inode = v9fs_get_inode(sb, newstat.st_mode);
+                fcall->params.rstat.stat.mode));
        if (IS_ERR(inode) && (PTR_ERR(inode) == -ENOSPC)) {
                eprintk(KERN_WARNING, "inode alloc failes, returns %ld\n",
@@ -631,7 +549,7 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
                goto FreeFcall;
        }
-        inode->i_ino = v9fs_qid2ino(&fcall->params.rstat.stat->qid);
+        inode->i_ino = v9fs_qid2ino(&fcall->params.rstat.stat.qid);
        fid = v9fs_fid_create(dentry, v9ses, newfid, 0);
        if (fid == NULL) {
@@ -640,10 +558,10 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
                goto FreeFcall;
        }
-        fid->qid = fcall->params.rstat.stat->qid;
+        fid->qid = fcall->params.rstat.stat.qid;
        dentry->d_op = &v9fs_dentry_operations;
-        v9fs_mistat2inode(fcall->params.rstat.stat, inode, inode->i_sb);
+        v9fs_stat2inode(&fcall->params.rstat.stat, inode, inode->i_sb);
        d_add(dentry, inode);
        kfree(fcall);
@@ -699,7 +617,7 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
            v9fs_fid_lookup(old_dentry->d_parent);
        struct v9fs_fid *newdirfid =
            v9fs_fid_lookup(new_dentry->d_parent);
-        struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
+        struct v9fs_wstat wstat;
        struct v9fs_fcall *fcall = NULL;
        int fid = -1;
        int olddirfidnum = -1;
@@ -708,9 +626,6 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        dprintk(DEBUG_VFS, "\n");
-        if (!mistat)
-                return -ENOMEM;
        if ((!oldfid) || (!olddirfid) || (!newdirfid)) {
                dprintk(DEBUG_ERROR, "problem with arguments\n");
                return -EBADF;
@@ -734,33 +649,22 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                goto FreeFcallnBail;
        }
-        v9fs_blank_mistat(v9ses, mistat);
+        v9fs_blank_wstat(&wstat);
+        wstat.muid = v9ses->name;
+        wstat.name = (char *) new_dentry->d_name.name;
-        strcpy(mistat->data + 1, v9ses->name);
+        retval = v9fs_t_wstat(v9ses, fid, &wstat, &fcall);
-        mistat->name = mistat->data + 1 + strlen(v9ses->name);
-        if (new_dentry->d_name.len >
-            (v9ses->maxdata - strlen(v9ses->name) - sizeof(struct v9fs_stat))) {
-                dprintk(DEBUG_ERROR, "new name too long\n");
-                goto FreeFcallnBail;
-        }
-        strcpy(mistat->name, new_dentry->d_name.name);
-        retval = v9fs_t_wstat(v9ses, fid, mistat, &fcall);
      FreeFcallnBail:
-        kfree(mistat);
        if (retval < 0)
-                dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n",
+                PRINT_FCALL_ERROR("wstat error", fcall);
-                        FCALL_ERROR(fcall));
        kfree(fcall);
        return retval;
 }
 /**
- * v9fs_vfs_getattr - retreive file metadata
+ * v9fs_vfs_getattr - retrieve file metadata
 * @mnt - mount information
 * @dentry - file to get attributes on
 * @stat - metadata structure to populate
@@ -788,7 +692,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
        if (err < 0)
                dprintk(DEBUG_ERROR, "stat error\n");
        else {
-                v9fs_mistat2inode(fcall->params.rstat.stat, dentry->d_inode,
+                v9fs_stat2inode(&fcall->params.rstat.stat, dentry->d_inode,
                                  dentry->d_inode->i_sb);
                generic_fillattr(dentry->d_inode, stat);
        }
@@ -809,57 +713,44 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
        struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode);
        struct v9fs_fid *fid = v9fs_fid_lookup(dentry);
        struct v9fs_fcall *fcall = NULL;
-        struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
+        struct v9fs_wstat wstat;
        int res = -EPERM;
        dprintk(DEBUG_VFS, "\n");
-        if (!mistat)
-                return -ENOMEM;
        if (!fid) {
                dprintk(DEBUG_ERROR,
                        "Couldn't find fid associated with dentry\n");
                return -EBADF;
        }
-        v9fs_blank_mistat(v9ses, mistat);
+        v9fs_blank_wstat(&wstat);
        if (iattr->ia_valid & ATTR_MODE)
-                mistat->mode = unixmode2p9mode(v9ses, iattr->ia_mode);
+                wstat.mode = unixmode2p9mode(v9ses, iattr->ia_mode);
        if (iattr->ia_valid & ATTR_MTIME)
-                mistat->mtime = iattr->ia_mtime.tv_sec;
+                wstat.mtime = iattr->ia_mtime.tv_sec;
        if (iattr->ia_valid & ATTR_ATIME)
-                mistat->atime = iattr->ia_atime.tv_sec;
+                wstat.atime = iattr->ia_atime.tv_sec;
        if (iattr->ia_valid & ATTR_SIZE)
-                mistat->length = iattr->ia_size;
+                wstat.length = iattr->ia_size;
        if (v9ses->extended) {
-                char *ptr = mistat->data+1;
+                if (iattr->ia_valid & ATTR_UID)
+                        wstat.n_uid = iattr->ia_uid;
-                if (iattr->ia_valid & ATTR_UID) {
-                        mistat->uid = ptr;
-                        ptr += 1+sprintf(ptr, "%08x", iattr->ia_uid);
-                        mistat->n_uid = iattr->ia_uid;
-                }
-                if (iattr->ia_valid & ATTR_GID) {
+                if (iattr->ia_valid & ATTR_GID)
-                        mistat->gid = ptr;
+                        wstat.n_gid = iattr->ia_gid;
-                        ptr += 1+sprintf(ptr, "%08x", iattr->ia_gid);
-                        mistat->n_gid = iattr->ia_gid;
-                }
        }
-        res = v9fs_t_wstat(v9ses, fid->fid, mistat, &fcall);
+        res = v9fs_t_wstat(v9ses, fid->fid, &wstat, &fcall);
        if (res < 0)
-                dprintk(DEBUG_ERROR, "wstat error: %s\n", FCALL_ERROR(fcall));
+                PRINT_FCALL_ERROR("wstat error", fcall);
-        kfree(mistat);
        kfree(fcall);
        if (res >= 0)
                res = inode_setattr(dentry->d_inode, iattr);
@@ -867,51 +758,47 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
 }
 /**
- * v9fs_mistat2inode - populate an inode structure with mistat info
+ * v9fs_stat2inode - populate an inode structure with mistat info
- * @mistat: Plan 9 metadata (mistat) structure
+ * @stat: Plan 9 metadata (mistat) structure
 * @inode: inode to populate
 * @sb: superblock of filesystem
 *
 */
 void
-v9fs_mistat2inode(struct v9fs_stat *mistat, struct inode *inode,
+v9fs_stat2inode(struct v9fs_stat *stat, struct inode *inode,
-                  struct super_block *sb)
+        struct super_block *sb)
 {
+        int n;
+        char ext[32];
        struct v9fs_session_info *v9ses = sb->s_fs_info;
        inode->i_nlink = 1;
-        inode->i_atime.tv_sec = mistat->atime;
+        inode->i_atime.tv_sec = stat->atime;
-        inode->i_mtime.tv_sec = mistat->mtime;
+        inode->i_mtime.tv_sec = stat->mtime;
-        inode->i_ctime.tv_sec = mistat->mtime;
+        inode->i_ctime.tv_sec = stat->mtime;
-        inode->i_uid = -1;
+        inode->i_uid = v9ses->uid;
-        inode->i_gid = -1;
+        inode->i_gid = v9ses->gid;
        if (v9ses->extended) {
-                /* TODO: string to uid mapping via user-space daemon */
+                inode->i_uid = stat->n_uid;
-                inode->i_uid = mistat->n_uid;
+                inode->i_gid = stat->n_gid;
-                inode->i_gid = mistat->n_gid;
-                if (mistat->n_uid == -1)
-                        sscanf(mistat->uid, "%x", &inode->i_uid);
-                if (mistat->n_gid == -1)
-                        sscanf(mistat->gid, "%x", &inode->i_gid);
        }
-        if (inode->i_uid == -1)
+        inode->i_mode = p9mode2unixmode(v9ses, stat->mode);
-                inode->i_uid = v9ses->uid;
-        if (inode->i_gid == -1)
-                inode->i_gid = v9ses->gid;
-        inode->i_mode = p9mode2unixmode(v9ses, mistat->mode);
        if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode))) {
                char type = 0;
                int major = -1;
                int minor = -1;
-                sscanf(mistat->extension, "%c %u %u", &type, &major, &minor);
+                n = stat->extension.len;
+                if (n > sizeof(ext)-1)
+                        n = sizeof(ext)-1;
+                memmove(ext, stat->extension.str, n);
+                ext[n] = 0;
+                sscanf(ext, "%c %u %u", &type, &major, &minor);
                switch (type) {
                case 'c':
                        inode->i_mode &= ~S_IFBLK;
@@ -920,14 +807,14 @@ v9fs_mistat2inode(struct v9fs_stat *mistat, struct inode *inode,
                case 'b':
                        break;
                default:
-                        dprintk(DEBUG_ERROR, "Unknown special type %c (%s)\n",
+                        dprintk(DEBUG_ERROR, "Unknown special type %c (%.*s)\n",
-                                type, mistat->extension);
+                                type, stat->extension.len, stat->extension.str);
                };
                inode->i_rdev = MKDEV(major, minor);
        } else
                inode->i_rdev = 0;
-        inode->i_size = mistat->length;
+        inode->i_size = stat->length;
        inode->i_blksize = sb->s_blocksize;
        inode->i_blocks =
@@ -955,71 +842,6 @@ ino_t v9fs_qid2ino(struct v9fs_qid *qid)
 }
 /**
- * v9fs_vfs_symlink - helper function to create symlinks
- * @dir: directory inode containing symlink
- * @dentry: dentry for symlink
- * @symname: symlink data
- *
- * See 9P2000.u RFC for more information
- *
- */
-static int
-v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
-{
-        int retval = -EPERM;
-        struct v9fs_fid *newfid;
-        struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
-        struct v9fs_fcall *fcall = NULL;
-        struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
-        dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
-                symname);
-        if (!mistat)
-                return -ENOMEM;
-        if (!v9ses->extended) {
-                dprintk(DEBUG_ERROR, "not extended\n");
-                goto FreeFcall;
-        }
-        /* issue a create */
-        retval = v9fs_create(dir, dentry, S_IFLNK, 0);
-        if (retval != 0)
-                goto FreeFcall;
-        newfid = v9fs_fid_lookup(dentry);
-        /* issue a twstat */
-        v9fs_blank_mistat(v9ses, mistat);
-        strcpy(mistat->data + 1, symname);
-        mistat->extension = mistat->data + 1;
-        retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall);
-        if (retval < 0) {
-                dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n",
-                        FCALL_ERROR(fcall));
-                goto FreeFcall;
-        }
-        kfree(fcall);
-        if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) {
-                dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n",
-                        FCALL_ERROR(fcall));
-                goto FreeFcall;
-        }
-        d_drop(dentry);         /* FID - will this also clunk? */
-      FreeFcall:
-        kfree(mistat);
-        kfree(fcall);
-        return retval;
-}
-/**
 * v9fs_readlink - read a symlink's location (internal version)
 * @dentry: dentry for symlink
 * @buffer: buffer to load symlink location into
@@ -1058,16 +880,17 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
        if (!fcall)
                return -EIO;
-        if (!(fcall->params.rstat.stat->mode & V9FS_DMSYMLINK)) {
+        if (!(fcall->params.rstat.stat.mode & V9FS_DMSYMLINK)) {
                retval = -EINVAL;
                goto FreeFcall;
        }
        /* copy extension buffer into buffer */
-        if (strlen(fcall->params.rstat.stat->extension) < buflen)
+        if (fcall->params.rstat.stat.extension.len < buflen)
-                buflen = strlen(fcall->params.rstat.stat->extension);
+                buflen = fcall->params.rstat.stat.extension.len;
-        memcpy(buffer, fcall->params.rstat.stat->extension, buflen + 1);
+        memcpy(buffer, fcall->params.rstat.stat.extension.str, buflen - 1);
+        buffer[buflen-1] = 0;
        retval = buflen;
@@ -1157,6 +980,77 @@ static void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void
                __putname(s);
 }
+static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
+        int mode, const char *extension)
+{
+        int err, retval;
+        struct v9fs_session_info *v9ses;
+        struct v9fs_fcall *fcall;
+        struct v9fs_fid *fid;
+        struct v9fs_wstat wstat;
+        v9ses = v9fs_inode2v9ses(dir);
+        retval = -EPERM;
+        fcall = NULL;
+        if (!v9ses->extended) {
+                dprintk(DEBUG_ERROR, "not extended\n");
+                goto free_mem;
+        }
+        /* issue a create */
+        retval = v9fs_create(dir, dentry, mode, 0);
+        if (retval != 0)
+                goto free_mem;
+        fid = v9fs_fid_get_created(dentry);
+        if (!fid) {
+                dprintk(DEBUG_ERROR, "couldn't resolve fid from dentry\n");
+                goto free_mem;
+        }
+        /* issue a Twstat */
+        v9fs_blank_wstat(&wstat);
+        wstat.muid = v9ses->name;
+        wstat.extension = (char *) extension;
+        retval = v9fs_t_wstat(v9ses, fid->fid, &wstat, &fcall);
+        if (retval < 0) {
+                PRINT_FCALL_ERROR("wstat error", fcall);
+                goto free_mem;
+        }
+        err = v9fs_t_clunk(v9ses, fid->fid);
+        if (err < 0) {
+                dprintk(DEBUG_ERROR, "clunk failed: %d\n", err);
+                goto free_mem;
+        }
+        d_drop(dentry);         /* FID - will this also clunk? */
+free_mem:
+        kfree(fcall);
+        return retval;
+}
+/**
+ * v9fs_vfs_symlink - helper function to create symlinks
+ * @dir: directory inode containing symlink
+ * @dentry: dentry for symlink
+ * @symname: symlink data
+ *
+ * See 9P2000.u RFC for more information
+ *
+ */
+static int
+v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
+{
+        dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
+                symname);
+        return v9fs_vfs_mkspecial(dir, dentry, S_IFLNK, symname);
+}
 /**
 * v9fs_vfs_link - create a hardlink
 * @old_dentry: dentry for file to link to
@@ -1173,64 +1067,24 @@ static int
 v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
              struct dentry *dentry)
 {
-        int retval = -EPERM;
+        int retval;
-        struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
+        struct v9fs_fid *oldfid;
-        struct v9fs_fcall *fcall = NULL;
+        char *name;
-        struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
-        struct v9fs_fid *oldfid = v9fs_fid_lookup(old_dentry);
-        struct v9fs_fid *newfid = NULL;
-        char *symname = __getname();
        dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
                old_dentry->d_name.name);
-        if (!v9ses->extended) {
+        oldfid = v9fs_fid_lookup(old_dentry);
-                dprintk(DEBUG_ERROR, "not extended\n");
+        if (!oldfid) {
-                goto FreeMem;
+                dprintk(DEBUG_ERROR, "can't find oldfid\n");
-        }
+                return -EPERM;
-        /* get fid of old_dentry */
-        sprintf(symname, "hardlink(%d)\n", oldfid->fid);
-        /* issue a create */
-        retval = v9fs_create(dir, dentry, V9FS_DMLINK, 0);
-        if (retval != 0)
-                goto FreeMem;
-        newfid = v9fs_fid_lookup(dentry);
-        if (!newfid) {
-                dprintk(DEBUG_ERROR, "couldn't resolve fid from dentry\n");
-                goto FreeMem;
-        }
-        /* issue a twstat */
-        v9fs_blank_mistat(v9ses, mistat);
-        strcpy(mistat->data + 1, symname);
-        mistat->extension = mistat->data + 1;
-        retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall);
-        if (retval < 0) {
-                dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n",
-                        FCALL_ERROR(fcall));
-                goto FreeMem;
-        }
-        kfree(fcall);
-        if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) {
-                dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n",
-                        FCALL_ERROR(fcall));
-                goto FreeMem;
        }
-        d_drop(dentry);         /* FID - will this also clunk? */
+        name = __getname();
+        sprintf(name, "hardlink(%d)\n", oldfid->fid);
-        kfree(fcall);
+        retval = v9fs_vfs_mkspecial(dir, dentry, V9FS_DMLINK, name);
-        fcall = NULL;
+        __putname(name);
-      FreeMem:
-        kfree(mistat);
-        kfree(fcall);
-        __putname(symname);
        return retval;
 }
@@ -1246,82 +1100,30 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
 static int
 v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
 {
-        int retval = -EPERM;
+        int retval;
-        struct v9fs_fid *newfid;
+        char *name;
-        struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
-        struct v9fs_fcall *fcall = NULL;
-        struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
-        char *symname = __getname();
        dprintk(DEBUG_VFS, " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino,
                dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev));
-        if (!mistat)
+        if (!new_valid_dev(rdev))
-                return -ENOMEM;
+                return -EINVAL;
-        if (!new_valid_dev(rdev)) {
-                retval = -EINVAL;
-                goto FreeMem;
-        }
-        if (!v9ses->extended) {
-                dprintk(DEBUG_ERROR, "not extended\n");
-                goto FreeMem;
-        }
-        /* issue a create */
-        retval = v9fs_create(dir, dentry, mode, 0);
-        if (retval != 0)
-                goto FreeMem;
-        newfid = v9fs_fid_lookup(dentry);
-        if (!newfid) {
-                dprintk(DEBUG_ERROR, "coudn't resove fid from dentry\n");
-                retval = -EINVAL;
-                goto FreeMem;
-        }
+        name = __getname();
        /* build extension */
        if (S_ISBLK(mode))
-                sprintf(symname, "b %u %u", MAJOR(rdev), MINOR(rdev));
+                sprintf(name, "b %u %u", MAJOR(rdev), MINOR(rdev));
        else if (S_ISCHR(mode))
-                sprintf(symname, "c %u %u", MAJOR(rdev), MINOR(rdev));
+                sprintf(name, "c %u %u", MAJOR(rdev), MINOR(rdev));
        else if (S_ISFIFO(mode))
-                ;       /* DO NOTHING */
+                *name = 0;
        else {
-                retval = -EINVAL;
+                __putname(name);
-                goto FreeMem;
+                return -EINVAL;
-        }
-        if (!S_ISFIFO(mode)) {
-                /* issue a twstat */
-                v9fs_blank_mistat(v9ses, mistat);
-                strcpy(mistat->data + 1, symname);
-                mistat->extension = mistat->data + 1;
-                retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall);
-                if (retval < 0) {
-                        dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n",
-                                FCALL_ERROR(fcall));
-                        goto FreeMem;
-                }
        }
-        /* need to update dcache so we show up */
+        retval = v9fs_vfs_mkspecial(dir, dentry, mode, name);
-        kfree(fcall);
+        __putname(name);
-        if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) {
-                dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n",
-                        FCALL_ERROR(fcall));
-                goto FreeMem;
-        }
-        d_drop(dentry);         /* FID - will this also clunk? */
-      FreeMem:
-        kfree(mistat);
-        kfree(fcall);
-        __putname(symname);
        return retval;
 }
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 82c5b0084079..2c4fa75be025 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -44,7 +44,6 @@
 #include "v9fs.h"
 #include "9p.h"
 #include "v9fs_vfs.h"
-#include "conv.h"
 #include "fid.h"
 static void v9fs_clear_inode(struct inode *);
@@ -92,7 +91,7 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
        sb->s_op = &v9fs_super_ops;
        sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC |
-            MS_NODIRATIME | MS_NOATIME;
+            MS_NOATIME;
 }
 /**
@@ -123,12 +122,13 @@ static struct super_block *v9fs_get_sb(struct file_system_type
        dprintk(DEBUG_VFS, " \n");
-        v9ses = kcalloc(1, sizeof(struct v9fs_session_info), GFP_KERNEL);
+        v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL);
        if (!v9ses)
                return ERR_PTR(-ENOMEM);
        if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) {
                dprintk(DEBUG_ERROR, "problem initiating session\n");
+                kfree(v9ses);
                return ERR_PTR(newfid);
        }
@@ -157,7 +157,7 @@ static struct super_block *v9fs_get_sb(struct file_system_type
        stat_result = v9fs_t_stat(v9ses, newfid, &fcall);
        if (stat_result < 0) {
                dprintk(DEBUG_ERROR, "stat error\n");
-                v9fs_t_clunk(v9ses, newfid, NULL);
+                v9fs_t_clunk(v9ses, newfid);
                v9fs_put_idpool(newfid, &v9ses->fidpool);
        } else {
                /* Setup the Root Inode */
@@ -167,10 +167,10 @@ static struct super_block *v9fs_get_sb(struct file_system_type
                        goto put_back_sb;
                }
-                root_fid->qid = fcall->params.rstat.stat->qid;
+                root_fid->qid = fcall->params.rstat.stat.qid;
                root->d_inode->i_ino =
-                    v9fs_qid2ino(&fcall->params.rstat.stat->qid);
+                    v9fs_qid2ino(&fcall->params.rstat.stat.qid);
-                v9fs_mistat2inode(fcall->params.rstat.stat, root->d_inode, sb);
+                v9fs_stat2inode(&fcall->params.rstat.stat, root->d_inode, sb);
        }
        kfree(fcall);
diff --git a/fs/Kconfig b/fs/Kconfig
index 382e3b2883d5..ef78e3a42d32 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -798,7 +798,7 @@ config PROC_KCORE
 config PROC_VMCORE
        bool "/proc/vmcore support (EXPERIMENTAL)"
-        depends on PROC_FS && EMBEDDED && EXPERIMENTAL && CRASH_DUMP
+        depends on PROC_FS && EXPERIMENTAL && CRASH_DUMP
        help
        Exports the dump image of crashed kernel in ELF format.
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 175b2e8177c1..f3d3d81eb7e9 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -1,6 +1,6 @@
 config BINFMT_ELF
        bool "Kernel support for ELF binaries"
-        depends on MMU
+        depends on MMU && (BROKEN || !FRV)
        default y
        ---help---
          ELF (Executable and Linkable Format) is a format for libraries and
diff --git a/fs/Makefile b/fs/Makefile
index 73676111ebbe..1db711319c80 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -10,11 +10,11 @@ obj-y :=	open.o read_write.o file_table.o buffer.o  bio.o super.o \
                ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \
                attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \
                seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \
-                ioprio.o pnode.o
+                ioprio.o pnode.o drop_caches.o
 obj-$(CONFIG_INOTIFY)           += inotify.o
 obj-$(CONFIG_EPOLL)             += eventpoll.o
-obj-$(CONFIG_COMPAT)            += compat.o
+obj-$(CONFIG_COMPAT)            += compat.o compat_ioctl.o
 nfsd-$(CONFIG_NFSD)             := nfsctl.o
 obj-y                           += $(nfsd-y) $(nfsd-m)
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 9ebe881c6786..44d439cb69f4 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -244,10 +244,10 @@ affs_put_inode(struct inode *inode)
        pr_debug("AFFS: put_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink);
        affs_free_prealloc(inode);
        if (atomic_read(&inode->i_count) == 1) {
-                down(&inode->i_sem);
+                mutex_lock(&inode->i_mutex);
                if (inode->i_size != AFFS_I(inode)->mmu_private)
                        affs_truncate(inode);
-                up(&inode->i_sem);
+                mutex_unlock(&inode->i_mutex);
        }
 }
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 0a57fd7c726f..9eef6bf156ab 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -118,7 +118,7 @@ static int kafscmd(void *arg)
        _SRXAFSCM_xxxx_t func;
        int die;
-        printk("kAFS: Started kafscmd %d\n", current->pid);
+        printk(KERN_INFO "kAFS: Started kafscmd %d\n", current->pid);
        daemonize("kafscmd");
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 6682d6d7f294..5c61c24dab2a 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -137,7 +137,7 @@ static inline void afs_dir_check_page(struct inode *dir, struct page *page)
 #endif
        /* determine how many magic numbers there should be in this page */
-        latter = dir->i_size - (page->index << PAGE_CACHE_SHIFT);
+        latter = dir->i_size - page_offset(page);
        if (latter >= PAGE_SIZE)
                qty = PAGE_SIZE;
        else
diff --git a/fs/afs/volume.h b/fs/afs/volume.h
index 1e691889c4c9..bfdcf19ba3f3 100644
--- a/fs/afs/volume.h
+++ b/fs/afs/volume.h
@@ -18,8 +18,6 @@
 #include "kafsasyncd.h"
 #include "cache.h"
-#define __packed __attribute__((packed))
 typedef enum {
        AFS_VLUPD_SLEEP,                /* sleeping waiting for update timer to fire */
        AFS_VLUPD_PENDING,              /* on pending queue */
@@ -115,7 +113,7 @@ struct afs_volume
        struct cachefs_cookie   *cache;         /* caching cookie */
 #endif
        afs_volid_t             vid;            /* volume ID */
-        afs_voltype_t __packed  type;           /* type of volume */
+        afs_voltype_t           type;           /* type of volume */
        char                    type_force;     /* force volume type (suppress R/O -> R/W) */
        unsigned short          nservers;       /* number of server slots filled */
        unsigned short          rjservers;      /* number of servers discarded due to -ENOMEDIUM */
diff --git a/fs/aio.c b/fs/aio.c
index 5a28b69ad223..aec2b1916d1b 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -29,7 +29,6 @@
 #include <linux/highmem.h>
 #include <linux/workqueue.h>
 #include <linux/security.h>
-#include <linux/rcuref.h>
 #include <asm/kmap_types.h>
 #include <asm/uaccess.h>
@@ -514,7 +513,7 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
        /* Must be done under the lock to serialise against cancellation.
         * Call this aio_fput as it duplicates fput via the fput_work.
         */
-        if (unlikely(rcuref_dec_and_test(&req->ki_filp->f_count))) {
+        if (unlikely(atomic_dec_and_test(&req->ki_filp->f_count))) {
                get_ioctx(ctx);
                spin_lock(&fput_lock);
                list_add(&req->ki_list, &fput_head);
diff --git a/fs/attr.c b/fs/attr.c
index 67bcd9b14ea5..97de94670878 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -10,11 +10,11 @@
 #include <linux/mm.h>
 #include <linux/string.h>
 #include <linux/smp_lock.h>
+#include <linux/capability.h>
 #include <linux/fsnotify.h>
 #include <linux/fcntl.h>
 #include <linux/quotaops.h>
 #include <linux/security.h>
-#include <linux/time.h>
 /* Taken over from the old code... */
@@ -67,20 +67,12 @@ EXPORT_SYMBOL(inode_change_ok);
 int inode_setattr(struct inode * inode, struct iattr * attr)
 {
        unsigned int ia_valid = attr->ia_valid;
-        int error = 0;
+        if (ia_valid & ATTR_SIZE &&
-        if (ia_valid & ATTR_SIZE) {
+            attr->ia_size != i_size_read(inode)) {
-                if (attr->ia_size != i_size_read(inode)) {
+                int error = vmtruncate(inode, attr->ia_size);
-                        error = vmtruncate(inode, attr->ia_size);
+                if (error)
-                        if (error || (ia_valid == ATTR_SIZE))
+                        return error;
-                                goto out;
-                } else {
-                        /*
-                         * We skipped the truncate but must still update
-                         * timestamps
-                         */
-                        ia_valid |= ATTR_MTIME|ATTR_CTIME;
-                }
        }
        if (ia_valid & ATTR_UID)
@@ -104,8 +96,8 @@ int inode_setattr(struct inode * inode, struct iattr * attr)
                inode->i_mode = mode;
        }
        mark_inode_dirty(inode);
-out:
-        return error;
+        return 0;
 }
 EXPORT_SYMBOL(inode_setattr);
diff --git a/fs/autofs/root.c b/fs/autofs/root.c
index a1ab1c0ed215..870e2cf33016 100644
--- a/fs/autofs/root.c
+++ b/fs/autofs/root.c
@@ -10,6 +10,7 @@
 *
 * ------------------------------------------------------------------------- */
+#include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/stat.h>
 #include <linux/param.h>
@@ -229,9 +230,9 @@ static struct dentry *autofs_root_lookup(struct inode *dir, struct dentry *dentr
        dentry->d_flags |= DCACHE_AUTOFS_PENDING;
        d_add(dentry, NULL);
-        up(&dir->i_sem);
+        mutex_unlock(&dir->i_mutex);
        autofs_revalidate(dentry, nd);
-        down(&dir->i_sem);
+        mutex_lock(&dir->i_mutex);
        /*
         * If we are still pending, check if we had to handle
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index fca83e28edcf..385bed09b0d8 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -209,7 +209,7 @@ static inline int simple_empty_nolock(struct dentry *dentry)
        struct dentry *child;
        int ret = 0;
-        list_for_each_entry(child, &dentry->d_subdirs, d_child)
+        list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child)
                if (simple_positive(child))
                        goto out;
        ret = 1;
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index feb6ac427d05..dc39589df165 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -105,7 +105,7 @@ repeat:
        next = this_parent->d_subdirs.next;
 resume:
        while (next != &this_parent->d_subdirs) {
-                struct dentry *dentry = list_entry(next, struct dentry, d_child);
+                struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child);
                /* Negative dentry - give up */
                if (!simple_positive(dentry)) {
@@ -138,7 +138,7 @@ resume:
        }
        if (this_parent != top) {
-                next = this_parent->d_child.next;
+                next = this_parent->d_u.d_child.next;
                this_parent = this_parent->d_parent;
                goto resume;
        }
@@ -163,7 +163,7 @@ repeat:
        next = this_parent->d_subdirs.next;
 resume:
        while (next != &this_parent->d_subdirs) {
-                struct dentry *dentry = list_entry(next, struct dentry, d_child);
+                struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child);
                /* Negative dentry - give up */
                if (!simple_positive(dentry)) {
@@ -199,7 +199,7 @@ cont:
        }
        if (this_parent != parent) {
-                next = this_parent->d_child.next;
+                next = this_parent->d_u.d_child.next;
                this_parent = this_parent->d_parent;
                goto resume;
        }
@@ -238,7 +238,7 @@ static struct dentry *autofs4_expire(struct super_block *sb,
        /* On exit from the loop expire is set to a dgot dentry
         * to expire or it's NULL */
        while ( next != &root->d_subdirs ) {
-                struct dentry *dentry = list_entry(next, struct dentry, d_child);
+                struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child);
                /* Negative dentry - give up */
                if ( !simple_positive(dentry) ) {
@@ -302,7 +302,7 @@ next:
                        expired, (int)expired->d_name.len, expired->d_name.name);
                spin_lock(&dcache_lock);
                list_del(&expired->d_parent->d_subdirs);
-                list_add(&expired->d_parent->d_subdirs, &expired->d_child);
+                list_add(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
                spin_unlock(&dcache_lock);
                return expired;
        }
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 818b37be5153..2d3082854a29 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -91,7 +91,7 @@ repeat:
        next = this_parent->d_subdirs.next;
 resume:
        while (next != &this_parent->d_subdirs) {
-                struct dentry *dentry = list_entry(next, struct dentry, d_child);
+                struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child);
                /* Negative dentry - don`t care */
                if (!simple_positive(dentry)) {
@@ -117,7 +117,7 @@ resume:
        if (this_parent != sbi->root) {
                struct dentry *dentry = this_parent;
-                next = this_parent->d_child.next;
+                next = this_parent->d_u.d_child.next;
                this_parent = this_parent->d_parent;
                spin_unlock(&dcache_lock);
                DPRINTK("parent dentry %p %.*s",
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 2a771ec66956..62d8d4acb8bb 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -12,6 +12,7 @@
 *
 * ------------------------------------------------------------------------- */
+#include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/stat.h>
 #include <linux/param.h>
@@ -86,7 +87,7 @@ static int autofs4_root_readdir(struct file *file, void *dirent,
 /* Update usage from here to top of tree, so that scan of
   top-level directories will give a useful result */
-static void autofs4_update_usage(struct dentry *dentry)
+static void autofs4_update_usage(struct vfsmount *mnt, struct dentry *dentry)
 {
        struct dentry *top = dentry->d_sb->s_root;
@@ -95,7 +96,7 @@ static void autofs4_update_usage(struct dentry *dentry)
                struct autofs_info *ino = autofs4_dentry_ino(dentry);
                if (ino) {
-                        update_atime(dentry->d_inode);
+                        touch_atime(mnt, dentry);
                        ino->last_used = jiffies;
                }
        }
@@ -143,7 +144,8 @@ static int autofs4_dcache_readdir(struct file * filp, void * dirent, filldir_t f
                        }
                        while(1) {
-                                struct dentry *de = list_entry(list, struct dentry, d_child);
+                                struct dentry *de = list_entry(list,
+                                                struct dentry, d_u.d_child);
                                if (!d_unhashed(de) && de->d_inode) {
                                        spin_unlock(&dcache_lock);
@@ -193,6 +195,8 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
                if (!empty)
                        d_invalidate(dentry);
+                nd.dentry = dentry;
+                nd.mnt = mnt;
                nd.flags = LOOKUP_DIRECTORY;
                status = (dentry->d_op->d_revalidate)(dentry, &nd);
@@ -288,10 +292,10 @@ out:
        return autofs4_dcache_readdir(file, dirent, filldir);
 }
-static int try_to_fill_dentry(struct dentry *dentry, 
+static int try_to_fill_dentry(struct vfsmount *mnt, struct dentry *dentry, int flags)
-                              struct super_block *sb,
-                              struct autofs_sb_info *sbi, int flags)
 {
+        struct super_block *sb = mnt->mnt_sb;
+        struct autofs_sb_info *sbi = autofs4_sbi(sb);
        struct autofs_info *de_info = autofs4_dentry_ino(dentry);
        int status = 0;
@@ -366,7 +370,7 @@ static int try_to_fill_dentry(struct dentry *dentry,
        /* We don't update the usages for the autofs daemon itself, this
           is necessary for recursive autofs mounts */
        if (!autofs4_oz_mode(sbi))
-                autofs4_update_usage(dentry);
+                autofs4_update_usage(mnt, dentry);
        spin_lock(&dentry->d_lock);
        dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
@@ -391,7 +395,7 @@ static int autofs4_revalidate(struct dentry * dentry, struct nameidata *nd)
        /* Pending dentry */
        if (autofs4_ispending(dentry)) {
                if (!oz_mode)
-                        status = try_to_fill_dentry(dentry, dir->i_sb, sbi, flags);
+                        status = try_to_fill_dentry(nd->mnt, dentry, flags);
                return status;
        }
@@ -408,14 +412,14 @@ static int autofs4_revalidate(struct dentry * dentry, struct nameidata *nd)
                         dentry, dentry->d_name.len, dentry->d_name.name);
                spin_unlock(&dcache_lock);
                if (!oz_mode)
-                        status = try_to_fill_dentry(dentry, dir->i_sb, sbi, flags);
+                        status = try_to_fill_dentry(nd->mnt, dentry, flags);
                return status;
        }
        spin_unlock(&dcache_lock);
        /* Update the usage list */
        if (!oz_mode)
-                autofs4_update_usage(dentry);
+                autofs4_update_usage(nd->mnt, dentry);
        return 1;
 }
@@ -488,9 +492,9 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
        d_add(dentry, NULL);
        if (dentry->d_op && dentry->d_op->d_revalidate) {
-                up(&dir->i_sem);
+                mutex_unlock(&dir->i_mutex);
                (dentry->d_op->d_revalidate)(dentry, nd);
-                down(&dir->i_sem);
+                mutex_lock(&dir->i_mutex);
        }
        /*
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 72011826f0cb..f312103434d4 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -33,8 +33,6 @@ static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs);
 static int load_aout_library(struct file*);
 static int aout_core_dump(long signr, struct pt_regs * regs, struct file *file);
-extern void dump_thread(struct pt_regs *, struct user *);
 static struct linux_binfmt aout_format = {
        .module         = THIS_MODULE,
        .load_binary    = load_aout_binary,
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index f36f2210204f..1b117a441298 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -58,7 +58,7 @@ extern int dump_fpu (struct pt_regs *, elf_fpregset_t *);
 * If we don't support core dumping, then supply a NULL so we
 * don't even try.
 */
-#ifdef USE_ELF_CORE_DUMP
+#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
 static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file);
 #else
 #define elf_core_dump   NULL
@@ -288,11 +288,17 @@ static unsigned long elf_map(struct file *filep, unsigned long addr,
                        struct elf_phdr *eppnt, int prot, int type)
 {
        unsigned long map_addr;
+        unsigned long pageoffset = ELF_PAGEOFFSET(eppnt->p_vaddr);
        down_write(&current->mm->mmap_sem);
-        map_addr = do_mmap(filep, ELF_PAGESTART(addr),
+        /* mmap() will return -EINVAL if given a zero size, but a
-                           eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr), prot, type,
+         * segment with zero filesize is perfectly valid */
-                           eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr));
+        if (eppnt->p_filesz + pageoffset)
+                map_addr = do_mmap(filep, ELF_PAGESTART(addr),
+                                   eppnt->p_filesz + pageoffset, prot, type,
+                                   eppnt->p_offset - pageoffset);
+        else
+                map_addr = ELF_PAGESTART(addr);
        up_write(&current->mm->mmap_sem);
        return(map_addr);
 }
@@ -616,7 +622,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
                                goto out_free_file;
                        retval = -ENOMEM;
-                        elf_interpreter = (char *) kmalloc(elf_ppnt->p_filesz,
+                        elf_interpreter = kmalloc(elf_ppnt->p_filesz,
                                                           GFP_KERNEL);
                        if (!elf_interpreter)
                                goto out_free_file;
@@ -1107,7 +1113,7 @@ out:
 * Note that some platforms still use traditional core dumps and not
 * the ELF core dump.  Each platform can select it as appropriate.
 */
-#ifdef USE_ELF_CORE_DUMP
+#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
 /*
 * ELF core dumper
@@ -1212,7 +1218,7 @@ static int writenote(struct memelfnote *men, struct file *file)
        if (!dump_seek(file, (off))) \
                goto end_coredump;
-static inline void fill_elf_header(struct elfhdr *elf, int segs)
+static void fill_elf_header(struct elfhdr *elf, int segs)
 {
        memcpy(elf->e_ident, ELFMAG, SELFMAG);
        elf->e_ident[EI_CLASS] = ELF_CLASS;
@@ -1237,7 +1243,7 @@ static inline void fill_elf_header(struct elfhdr *elf, int segs)
        return;
 }
-static inline void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, off_t offset)
+static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, off_t offset)
 {
        phdr->p_type = PT_NOTE;
        phdr->p_offset = offset;
@@ -1628,17 +1634,17 @@ static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file)
        ELF_CORE_WRITE_EXTRA_DATA;
 #endif
-        if ((off_t) file->f_pos != offset) {
+        if ((off_t)file->f_pos != offset) {
                /* Sanity check */
-                printk("elf_core_dump: file->f_pos (%ld) != offset (%ld)\n",
+                printk(KERN_WARNING "elf_core_dump: file->f_pos (%ld) != offset (%ld)\n",
-                       (off_t) file->f_pos, offset);
+                       (off_t)file->f_pos, offset);
        }
 end_coredump:
        set_fs(fs);
 cleanup:
-        while(!list_empty(&thread_list)) {
+        while (!list_empty(&thread_list)) {
                struct list_head *tmp = thread_list.next;
                list_del(tmp);
                kfree(list_entry(tmp, struct elf_thread_status, list));
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index e0344f69c79d..5b3076e8ee90 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -187,7 +187,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, struct pt_regs *regs
                                goto error;
                        /* read the name of the interpreter into memory */
-                        interpreter_name = (char *) kmalloc(phdr->p_filesz, GFP_KERNEL);
+                        interpreter_name = kmalloc(phdr->p_filesz, GFP_KERNEL);
                        if (!interpreter_name)
                                goto error;
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 9d6625829b99..108d56bbd0d0 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -77,8 +77,6 @@ static int load_flat_shared_library(int id, struct lib_info *p);
 static int load_flat_binary(struct linux_binprm *, struct pt_regs * regs);
 static int flat_core_dump(long signr, struct pt_regs * regs, struct file *file);
-extern void dump_thread(struct pt_regs *, struct user *);
 static struct linux_binfmt flat_format = {
        .module         = THIS_MODULE,
        .load_binary    = load_flat_binary,
@@ -444,19 +442,22 @@ static int load_flat_file(struct linux_binprm * bprm,
        flags     = ntohl(hdr->flags);
        rev       = ntohl(hdr->rev);
-        if (flags & FLAT_FLAG_KTRACE)
+        if (strncmp(hdr->magic, "bFLT", 4)) {
-                printk("BINFMT_FLAT: Loading file: %s\n", bprm->filename);
-        if (strncmp(hdr->magic, "bFLT", 4) ||
-                        (rev != FLAT_VERSION && rev != OLD_FLAT_VERSION)) {
                /*
                 * because a lot of people do not manage to produce good
                 * flat binaries,  we leave this printk to help them realise
                 * the problem.  We only print the error if its not a script file
                 */
                if (strncmp(hdr->magic, "#!", 2))
-                        printk("BINFMT_FLAT: bad magic/rev (0x%x, need 0x%x)\n",
+                        printk("BINFMT_FLAT: bad header magic\n");
-                                        rev, (int) FLAT_VERSION);
+                return -ENOEXEC;
+        }
+        if (flags & FLAT_FLAG_KTRACE)
+                printk("BINFMT_FLAT: Loading file: %s\n", bprm->filename);
+        if (rev != FLAT_VERSION && rev != OLD_FLAT_VERSION) {
+                printk("BINFMT_FLAT: bad flat file version 0x%x (supported 0x%x and 0x%x)\n", rev, FLAT_VERSION, OLD_FLAT_VERSION);
                return -ENOEXEC;
        }
        
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 2568eb41cb3a..6a7b730c206b 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -264,7 +264,7 @@ static int unquote(char *from)
        return p - from;
 }
-static inline char * check_special_flags (char * sfs, Node * e)
+static char * check_special_flags (char * sfs, Node * e)
 {
        char * p = sfs;
        int cont = 1;
@@ -588,11 +588,11 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
                case 2: set_bit(Enabled, &e->flags);
                        break;
                case 3: root = dget(file->f_vfsmnt->mnt_sb->s_root);
-                        down(&root->d_inode->i_sem);
+                        mutex_lock(&root->d_inode->i_mutex);
                        kill_node(e);
-                        up(&root->d_inode->i_sem);
+                        mutex_unlock(&root->d_inode->i_mutex);
                        dput(root);
                        break;
                default: return res;
@@ -622,7 +622,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
                return PTR_ERR(e);
        root = dget(sb->s_root);
-        down(&root->d_inode->i_sem);
+        mutex_lock(&root->d_inode->i_mutex);
        dentry = lookup_one_len(e->name, root, strlen(e->name));
        err = PTR_ERR(dentry);
        if (IS_ERR(dentry))
@@ -658,7 +658,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
 out2:
        dput(dentry);
 out:
-        up(&root->d_inode->i_sem);
+        mutex_unlock(&root->d_inode->i_mutex);
        dput(root);
        if (err) {
@@ -703,12 +703,12 @@ static ssize_t bm_status_write(struct file * file, const char __user * buffer,
                case 1: enabled = 0; break;
                case 2: enabled = 1; break;
                case 3: root = dget(file->f_vfsmnt->mnt_sb->s_root);
-                        down(&root->d_inode->i_sem);
+                        mutex_lock(&root->d_inode->i_mutex);
                        while (!list_empty(&entries))
                                kill_node(list_entry(entries.next, Node, list));
-                        up(&root->d_inode->i_sem);
+                        mutex_unlock(&root->d_inode->i_mutex);
                        dput(root);
                default: return res;
        }
diff --git a/fs/bio.c b/fs/bio.c
index dfe242a21eb4..bbc442b8c867 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -123,9 +123,10 @@ static void bio_fs_destructor(struct bio *bio)
        bio_free(bio, fs_bio_set);
 }
-inline void bio_init(struct bio *bio)
+void bio_init(struct bio *bio)
 {
        bio->bi_next = NULL;
+        bio->bi_bdev = NULL;
        bio->bi_flags = 1 << BIO_UPTODATE;
        bio->bi_rw = 0;
        bio->bi_vcnt = 0;
@@ -252,7 +253,7 @@ inline int bio_hw_segments(request_queue_t *q, struct bio *bio)
 *      the actual data it points to. Reference count of returned
 *      bio will be one.
 */
-inline void __bio_clone(struct bio *bio, struct bio *bio_src)
+void __bio_clone(struct bio *bio, struct bio *bio_src)
 {
        request_queue_t *q = bdev_get_queue(bio_src->bi_bdev);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index e0df94c37b7e..6e50346fb1ee 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -202,7 +202,7 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin)
        loff_t size;
        loff_t retval;
-        down(&bd_inode->i_sem);
+        mutex_lock(&bd_inode->i_mutex);
        size = i_size_read(bd_inode);
        switch (origin) {
@@ -219,7 +219,7 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin)
                }
                retval = offset;
        }
-        up(&bd_inode->i_sem);
+        mutex_unlock(&bd_inode->i_mutex);
        return retval;
 }
        
diff --git a/fs/buffer.c b/fs/buffer.c
index 5287be18633b..3dc712f29d2d 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -26,6 +26,7 @@
 #include <linux/percpu.h>
 #include <linux/slab.h>
 #include <linux/smp_lock.h>
+#include <linux/capability.h>
 #include <linux/blkdev.h>
 #include <linux/file.h>
 #include <linux/quotaops.h>
@@ -153,14 +154,8 @@ int sync_blockdev(struct block_device *bdev)
 {
        int ret = 0;
-        if (bdev) {
+        if (bdev)
-                int err;
+                ret = filemap_write_and_wait(bdev->bd_inode->i_mapping);
-                ret = filemap_fdatawrite(bdev->bd_inode->i_mapping);
-                err = filemap_fdatawait(bdev->bd_inode->i_mapping);
-                if (!ret)
-                        ret = err;
-        }
        return ret;
 }
 EXPORT_SYMBOL(sync_blockdev);
@@ -358,11 +353,11 @@ static long do_fsync(unsigned int fd, int datasync)
         * We need to protect against concurrent writers,
         * which could cause livelocks in fsync_buffers_list
         */
-        down(&mapping->host->i_sem);
+        mutex_lock(&mapping->host->i_mutex);
        err = file->f_op->fsync(file, file->f_dentry, datasync);
        if (!ret)
                ret = err;
-        up(&mapping->host->i_sem);
+        mutex_unlock(&mapping->host->i_mutex);
        err = filemap_fdatawait(mapping);
        if (!ret)
                ret = err;
@@ -1032,7 +1027,7 @@ try_again:
                /* Link the buffer to its page */
                set_bh_page(bh, page, offset);
-                bh->b_end_io = NULL;
+                init_buffer(bh, NULL, NULL);
        }
        return head;
 /*
@@ -1170,7 +1165,7 @@ failed:
 * some of those buffers may be aliases of filesystem data.
 * grow_dev_page() will go BUG() if this happens.
 */
-static inline int
+static int
 grow_buffers(struct block_device *bdev, sector_t block, int size)
 {
        struct page *page;
@@ -1396,7 +1391,7 @@ static void bh_lru_install(struct buffer_head *bh)
 /*
 * Look up the bh in this cpu's LRU.  If it's there, move it to the head.
 */
-static inline struct buffer_head *
+static struct buffer_head *
 lookup_bh_lru(struct block_device *bdev, sector_t block, int size)
 {
        struct buffer_head *ret = NULL;
@@ -1546,7 +1541,7 @@ EXPORT_SYMBOL(set_bh_page);
 /*
 * Called when truncating a buffer on a page completely.
 */
-static inline void discard_buffer(struct buffer_head * bh)
+static void discard_buffer(struct buffer_head * bh)
 {
        lock_buffer(bh);
        clear_buffer_dirty(bh);
@@ -1768,7 +1763,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
         * handle that here by just cleaning them.
         */
-        block = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+        block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
        head = page_buffers(page);
        bh = head;
@@ -2160,11 +2155,12 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
 * truncates.  Uses prepare/commit_write to allow the filesystem to
 * deal with the hole.  
 */
-int generic_cont_expand(struct inode *inode, loff_t size)
+static int __generic_cont_expand(struct inode *inode, loff_t size,
+                                 pgoff_t index, unsigned int offset)
 {
        struct address_space *mapping = inode->i_mapping;
        struct page *page;
-        unsigned long index, offset, limit;
+        unsigned long limit;
        int err;
        err = -EFBIG;
@@ -2176,24 +2172,24 @@ int generic_cont_expand(struct inode *inode, loff_t size)
        if (size > inode->i_sb->s_maxbytes)
                goto out;
-        offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */
-        /* ugh.  in prepare/commit_write, if from==to==start of block, we 
-        ** skip the prepare.  make sure we never send an offset for the start
-        ** of a block
-        */
-        if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) {
-                offset++;
-        }
-        index = size >> PAGE_CACHE_SHIFT;
        err = -ENOMEM;
        page = grab_cache_page(mapping, index);
        if (!page)
                goto out;
        err = mapping->a_ops->prepare_write(NULL, page, offset, offset);
-        if (!err) {
+        if (err) {
-                err = mapping->a_ops->commit_write(NULL, page, offset, offset);
+                /*
+                 * ->prepare_write() may have instantiated a few blocks
+                 * outside i_size.  Trim these off again.
+                 */
+                unlock_page(page);
+                page_cache_release(page);
+                vmtruncate(inode, inode->i_size);
+                goto out;
        }
+        err = mapping->a_ops->commit_write(NULL, page, offset, offset);
        unlock_page(page);
        page_cache_release(page);
        if (err > 0)
@@ -2202,6 +2198,36 @@ out:
        return err;
 }
+int generic_cont_expand(struct inode *inode, loff_t size)
+{
+        pgoff_t index;
+        unsigned int offset;
+        offset = (size & (PAGE_CACHE_SIZE - 1)); /* Within page */
+        /* ugh.  in prepare/commit_write, if from==to==start of block, we
+        ** skip the prepare.  make sure we never send an offset for the start
+        ** of a block
+        */
+        if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) {
+                /* caller must handle this extra byte. */
+                offset++;
+        }
+        index = size >> PAGE_CACHE_SHIFT;
+        return __generic_cont_expand(inode, size, index, offset);
+}
+int generic_cont_expand_simple(struct inode *inode, loff_t size)
+{
+        loff_t pos = size - 1;
+        pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+        unsigned int offset = (pos & (PAGE_CACHE_SIZE - 1)) + 1;
+        /* prepare/commit_write can handle even if from==to==start of block. */
+        return __generic_cont_expand(inode, size, index, offset);
+}
 /*
 * For moronic filesystems that do not allow holes in file.
 * We may have to extend the file.
@@ -2313,7 +2339,7 @@ int generic_commit_write(struct file *file, struct page *page,
        __block_commit_write(inode,page,from,to);
        /*
         * No need to use i_size_read() here, the i_size
-         * cannot change under us because we hold i_sem.
+         * cannot change under us because we hold i_mutex.
         */
        if (pos > inode->i_size) {
                i_size_write(inode, pos);
@@ -2610,7 +2636,7 @@ int block_truncate_page(struct address_space *mapping,
        pgoff_t index = from >> PAGE_CACHE_SHIFT;
        unsigned offset = from & (PAGE_CACHE_SIZE-1);
        unsigned blocksize;
-        pgoff_t iblock;
+        sector_t iblock;
        unsigned length, pos;
        struct inode *inode = mapping->host;
        struct page *page;
@@ -2626,7 +2652,7 @@ int block_truncate_page(struct address_space *mapping,
                return 0;
        length = blocksize - length;
-        iblock = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+        iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
        
        page = grab_cache_page(mapping, index);
        err = -ENOMEM;
@@ -3145,6 +3171,7 @@ EXPORT_SYMBOL(fsync_bdev);
 EXPORT_SYMBOL(generic_block_bmap);
 EXPORT_SYMBOL(generic_commit_write);
 EXPORT_SYMBOL(generic_cont_expand);
+EXPORT_SYMBOL(generic_cont_expand_simple);
 EXPORT_SYMBOL(init_buffer);
 EXPORT_SYMBOL(invalidate_bdev);
 EXPORT_SYMBOL(ll_rw_block);
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 3b1b1eefdbb0..21195c481637 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -35,7 +35,7 @@ static struct char_device_struct {
        unsigned int major;
        unsigned int baseminor;
        int minorct;
-        const char *name;
+        char name[64];
        struct file_operations *fops;
        struct cdev *cdev;              /* will die */
 } *chrdevs[MAX_PROBE_HASH];
@@ -46,34 +46,84 @@ static inline int major_to_index(int major)
        return major % MAX_PROBE_HASH;
 }
-/* get char device names in somewhat random order */
+struct chrdev_info {
-int get_chrdev_list(char *page)
+        int index;
-{
        struct char_device_struct *cd;
-        int i, len;
+};
-        len = sprintf(page, "Character devices:\n");
+void *get_next_chrdev(void *dev)
+{
+        struct chrdev_info *info;
+        if (dev == NULL) {
+                info = kmalloc(sizeof(*info), GFP_KERNEL);
+                if (!info)
+                        goto out;
+                info->index=0;
+                info->cd = chrdevs[info->index];
+                if (info->cd)
+                        goto out;
+        } else {
+                info = dev;
+        }
+        while (info->index < ARRAY_SIZE(chrdevs)) {
+                if (info->cd)
+                        info->cd = info->cd->next;
+                if (info->cd)
+                        goto out;
+                /*
+                 * No devices on this chain, move to the next
+                 */
+                info->index++;
+                info->cd = (info->index < ARRAY_SIZE(chrdevs)) ?
+                        chrdevs[info->index] : NULL;
+                if (info->cd)
+                        goto out;
+        }
+out:
+        return info;
+}
+void *acquire_chrdev_list(void)
+{
        down(&chrdevs_lock);
+        return get_next_chrdev(NULL);
+}
+void release_chrdev_list(void *dev)
+{
+        up(&chrdevs_lock);
+        kfree(dev);
+}
+int count_chrdev_list(void)
+{
+        struct char_device_struct *cd;
+        int i, count;
+        count = 0;
        for (i = 0; i < ARRAY_SIZE(chrdevs) ; i++) {
-                for (cd = chrdevs[i]; cd; cd = cd->next) {
+                for (cd = chrdevs[i]; cd; cd = cd->next)
-                        /*
+                        count++;
-                         * if the current name, plus the 5 extra characters
-                         * in the device line for this entry
-                         * would run us off the page, we're done
-                         */
-                        if ((len+strlen(cd->name) + 5) >= PAGE_SIZE)
-                                goto page_full;
-                        len += sprintf(page+len, "%3d %s\n",
-                                       cd->major, cd->name);
-                }
        }
-page_full:
-        up(&chrdevs_lock);
-        return len;
+        return count;
+}
+int get_chrdev_info(void *dev, int *major, char **name)
+{
+        struct chrdev_info *info = dev;
+        if (info->cd == NULL)
+                return 1;
+        *major = info->cd->major;
+        *name = info->cd->name;
+        return 0;
 }
 /*
@@ -121,7 +171,7 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor,
        cd->major = major;
        cd->baseminor = baseminor;
        cd->minorct = minorct;
-        cd->name = name;
+        strncpy(cd->name,name, 64);
        i = major_to_index(major);
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 943ef9b82244..d335015473a5 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,3 +1,11 @@
+Version 1.40
+------------
+Use fsuid (fsgid) more consistently instead of uid (gid). Improve performance
+of readpages by eliminating one extra memcpy. Allow update of file size
+from remote server even if file is open for write as long as mount is
+directio.  Recognize share mode security and send NTLM encrypted password
+on tree connect if share mode negotiated.
 Version 1.39
 ------------
 Defer close of a file handle slightly if pending writes depend on that handle
@@ -7,6 +15,8 @@ Fix SFU style symlinks and mknod needed for servers which do not support the
 CIFS Unix Extensions.  Fix setfacl/getfacl on bigendian. Timeout negative
 dentries so files that the client sees as deleted but that later get created
 on the server will be recognized.  Add client side permission check on setattr.
+Timeout stuck requests better (where server has never responded or sent corrupt
+responses)
 Version 1.38
 ------------
diff --git a/fs/cifs/README b/fs/cifs/README
index e5d09a2fc7a5..b0070d1b149d 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -436,7 +436,17 @@ A partial list of the supported mount options follows:
                SFU does).  In the future the bottom 9 bits of the mode
                mode also will be emulated using queries of the security
                descriptor (ACL).
-                
+sec             Security mode.  Allowed values are:
+                        none    attempt to connection as a null user (no name)
+                        krb5    Use Kerberos version 5 authentication
+                        krb5i   Use Kerberos authentication and packet signing
+                        ntlm    Use NTLM password hashing (default)
+                        ntlmi   Use NTLM password hashing with signing (if
+                                /proc/fs/cifs/PacketSigningEnabled on or if
+                                server requires signing also can be the default) 
+                        ntlmv2  Use NTLMv2 password hashing      
+                        ntlmv2i Use NTLMv2 password hashing with packet signing
 The mount.cifs mount helper also accepts a few mount options before -o
 including:
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 22a444a3fe4c..f4124a32bef8 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -219,6 +219,10 @@ cifs_stats_write(struct file *file, const char __user *buffer,
        if (c == '1' || c == 'y' || c == 'Y' || c == '0') {
                read_lock(&GlobalSMBSeslock);
+#ifdef CONFIG_CIFS_STATS2
+                atomic_set(&totBufAllocCount, 0);
+                atomic_set(&totSmBufAllocCount, 0);
+#endif /* CONFIG_CIFS_STATS2 */
                list_for_each(tmp, &GlobalTreeConnectionList) {
                        tcon = list_entry(tmp, struct cifsTconInfo,
                                        cifsConnectionList);
@@ -276,6 +280,14 @@ cifs_stats_read(char *buf, char **beginBuffer, off_t offset,
                        smBufAllocCount.counter,cifs_min_small);
        length += item_length;
        buf += item_length;
+#ifdef CONFIG_CIFS_STATS2
+        item_length = sprintf(buf, "Total Large %d Small %d Allocations\n",
+                                atomic_read(&totBufAllocCount),
+                                atomic_read(&totSmBufAllocCount));
+        length += item_length;
+        buf += item_length;
+#endif /* CONFIG_CIFS_STATS2 */
        item_length = 
                sprintf(buf,"Operations (MIDs): %d\n",
                        midCount.counter);
@@ -389,8 +401,8 @@ static read_proc_t ntlmv2_enabled_read;
 static write_proc_t ntlmv2_enabled_write;
 static read_proc_t packet_signing_enabled_read;
 static write_proc_t packet_signing_enabled_write;
-static read_proc_t quotaEnabled_read;
+static read_proc_t experimEnabled_read;
-static write_proc_t quotaEnabled_write;
+static write_proc_t experimEnabled_write;
 static read_proc_t linuxExtensionsEnabled_read;
 static write_proc_t linuxExtensionsEnabled_write;
@@ -430,9 +442,9 @@ cifs_proc_init(void)
                pde->write_proc = oplockEnabled_write;
        pde = create_proc_read_entry("Experimental", 0, proc_fs_cifs,
-                                quotaEnabled_read, NULL);
+                                experimEnabled_read, NULL);
        if (pde)
-                pde->write_proc = quotaEnabled_write;
+                pde->write_proc = experimEnabled_write;
        pde = create_proc_read_entry("LinuxExtensionsEnabled", 0, proc_fs_cifs,
                                linuxExtensionsEnabled_read, NULL);
@@ -574,14 +586,13 @@ oplockEnabled_write(struct file *file, const char __user *buffer,
 }
 static int
-quotaEnabled_read(char *page, char **start, off_t off,
+experimEnabled_read(char *page, char **start, off_t off,
                   int count, int *eof, void *data)
 {
        int len;
        len = sprintf(page, "%d\n", experimEnabled);
-/* could also check if quotas are enabled in kernel
-        as a whole first */
        len -= off;
        *start = page + off;
@@ -596,21 +607,23 @@ quotaEnabled_read(char *page, char **start, off_t off,
        return len;
 }
 static int
-quotaEnabled_write(struct file *file, const char __user *buffer,
+experimEnabled_write(struct file *file, const char __user *buffer,
                    unsigned long count, void *data)
 {
-        char c;
+        char c;
-        int rc;
+        int rc;
-        rc = get_user(c, buffer);
+        rc = get_user(c, buffer);
-        if (rc)
+        if (rc)
-                return rc;
+                return rc;
-        if (c == '0' || c == 'n' || c == 'N')
+        if (c == '0' || c == 'n' || c == 'N')
-                experimEnabled = 0;
+                experimEnabled = 0;
-        else if (c == '1' || c == 'y' || c == 'Y')
+        else if (c == '1' || c == 'y' || c == 'Y')
-                experimEnabled = 1;
+                experimEnabled = 1;
+        else if (c == '2')
+                experimEnabled = 2;
-        return count;
+        return count;
 }
 static int
@@ -620,8 +633,6 @@ linuxExtensionsEnabled_read(char *page, char **start, off_t off,
        int len;
        len = sprintf(page, "%d\n", linuxExtEnabled);
-/* could also check if quotas are enabled in kernel
-        as a whole first */
        len -= off;
        *start = page + off;
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index f799f6f0e729..ad58eb0c4d6d 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -24,9 +24,10 @@
 #define CIFS_MOUNT_DIRECT_IO    8 /* do not write nor read through page cache */
 #define CIFS_MOUNT_NO_XATTR  0x10 /* if set - disable xattr support */
 #define CIFS_MOUNT_MAP_SPECIAL_CHR 0x20 /* remap illegal chars in filenames */
-#define CIFS_MOUNT_POSIX_PATHS 0x40 /* Negotiate posix pathnames if possible. */
+#define CIFS_MOUNT_POSIX_PATHS  0x40 /* Negotiate posix pathnames if possible. */
-#define CIFS_MOUNT_UNX_EMUL    0x80 /* Network compat with SFUnix emulation */
+#define CIFS_MOUNT_UNX_EMUL     0x80 /* Network compat with SFUnix emulation */
 #define CIFS_MOUNT_NO_BRL       0x100 /* No sending byte range locks to srv */
+#define CIFS_MOUNT_CIFS_ACL     0x200 /* send ACL requests to non-POSIX srv */
 struct cifs_sb_info {
        struct cifsTconInfo *tcon;      /* primary mount */
diff --git a/fs/cifs/cifs_uniupr.h b/fs/cifs/cifs_uniupr.h
index decd138f14d4..da2ad5b451ac 100644
--- a/fs/cifs/cifs_uniupr.h
+++ b/fs/cifs/cifs_uniupr.h
@@ -242,7 +242,7 @@ static signed char UniCaseRangeLff20[27] = {
 /*
 * Lower Case Range
 */
-const static struct UniCaseRange CifsUniLowerRange[] = {
+static const struct UniCaseRange CifsUniLowerRange[] = {
        0x0380, 0x03ab, UniCaseRangeL0380,
        0x0400, 0x042f, UniCaseRangeL0400,
        0x0490, 0x04cb, UniCaseRangeL0490,
diff --git a/fs/cifs/cifsacl.h b/fs/cifs/cifsacl.h
new file mode 100644
index 000000000000..d0776ac2b804
--- /dev/null
+++ b/fs/cifs/cifsacl.h
@@ -0,0 +1,38 @@
+/*
+ *   fs/cifs/cifsacl.h
+ *
+ *   Copyright (c) International Business Machines  Corp., 2005
+ *   Author(s): Steve French (sfrench@us.ibm.com)
+ *
+ *   This library is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU Lesser General Public License as published
+ *   by the Free Software Foundation; either version 2.1 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This library is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU Lesser General Public License for more details.
+ *
+ *   You should have received a copy of the GNU Lesser General Public License
+ *   along with this library; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#ifndef _CIFSACL_H
+#define _CIFSACL_H
+struct cifs_sid {
+        __u8 revision; /* revision level */
+        __u8 num_subauths;
+        __u8 authority[6];
+        __u32 sub_auth[4];
+        /* next sub_auth if any ... */
+} __attribute__((packed));
+/* everyone */
+extern const struct cifs_sid sid_everyone;
+/* group users */
+extern const struct cifs_sid sid_user;
+#endif /* _CIFSACL_H */
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index fe2bb7c4c912..a2c24858d40f 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -1,7 +1,7 @@
 /*
 *   fs/cifs/cifsencrypt.c
 *
- *   Copyright (C) International Business Machines  Corp., 2003
+ *   Copyright (C) International Business Machines  Corp., 2005
 *   Author(s): Steve French (sfrench@us.ibm.com)
 *
 *   This library is free software; you can redistribute it and/or modify
@@ -82,6 +82,59 @@ int cifs_sign_smb(struct smb_hdr * cifs_pdu, struct TCP_Server_Info * server,
        return rc;
 }
+static int cifs_calc_signature2(const struct kvec * iov, int n_vec,
+                                const char * key, char * signature)
+{
+        struct  MD5Context context;
+        if((iov == NULL) || (signature == NULL))
+                return -EINVAL;
+        MD5Init(&context);
+        MD5Update(&context,key,CIFS_SESSION_KEY_SIZE+16);
+/*        MD5Update(&context,cifs_pdu->Protocol,cifs_pdu->smb_buf_length); */ /* BB FIXME BB */
+        MD5Final(signature,&context);
+        return -EOPNOTSUPP;
+/*        return 0; */
+}
+int cifs_sign_smb2(struct kvec * iov, int n_vec, struct TCP_Server_Info *server,
+                   __u32 * pexpected_response_sequence_number)
+{
+        int rc = 0;
+        char smb_signature[20];
+        struct smb_hdr * cifs_pdu = iov[0].iov_base;
+        if((cifs_pdu == NULL) || (server == NULL))
+                return -EINVAL;
+        if((cifs_pdu->Flags2 & SMBFLG2_SECURITY_SIGNATURE) == 0)
+                return rc;
+        spin_lock(&GlobalMid_Lock);
+        cifs_pdu->Signature.Sequence.SequenceNumber = 
+                                cpu_to_le32(server->sequence_number);
+        cifs_pdu->Signature.Sequence.Reserved = 0;
+        *pexpected_response_sequence_number = server->sequence_number++;
+        server->sequence_number++;
+        spin_unlock(&GlobalMid_Lock);
+        rc = cifs_calc_signature2(iov, n_vec, server->mac_signing_key,
+                                      smb_signature);
+        if(rc)
+                memset(cifs_pdu->Signature.SecuritySignature, 0, 8);
+        else
+                memcpy(cifs_pdu->Signature.SecuritySignature, smb_signature, 8);
+        return rc;
+}
 int cifs_verify_signature(struct smb_hdr * cifs_pdu, const char * mac_key,
        __u32 expected_sequence_number)
 {
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 2a13a2bac8f1..79eeccd0437f 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -513,6 +513,17 @@ static ssize_t cifs_file_aio_write(struct kiocb *iocb, const char __user *buf,
        return written;
 }
+static loff_t cifs_llseek(struct file *file, loff_t offset, int origin)
+{
+        /* origin == SEEK_END => we must revalidate the cached file length */
+        if (origin == 2) {
+                int retval = cifs_revalidate(file->f_dentry);
+                if (retval < 0)
+                        return (loff_t)retval;
+        }
+        return remote_llseek(file, offset, origin);
+}
 static struct file_system_type cifs_fs_type = {
        .owner = THIS_MODULE,
        .name = "cifs",
@@ -586,6 +597,7 @@ struct file_operations cifs_file_ops = {
        .flush = cifs_flush,
        .mmap  = cifs_file_mmap,
        .sendfile = generic_file_sendfile,
+        .llseek = cifs_llseek,
 #ifdef CONFIG_CIFS_POSIX
        .ioctl  = cifs_ioctl,
 #endif /* CONFIG_CIFS_POSIX */
@@ -609,7 +621,7 @@ struct file_operations cifs_file_direct_ops = {
 #ifdef CONFIG_CIFS_POSIX
        .ioctl  = cifs_ioctl,
 #endif /* CONFIG_CIFS_POSIX */
+        .llseek = cifs_llseek,
 #ifdef CONFIG_CIFS_EXPERIMENTAL
        .dir_notify = cifs_dir_notify,
 #endif /* CONFIG_CIFS_EXPERIMENTAL */
@@ -627,6 +639,7 @@ struct file_operations cifs_file_nobrl_ops = {
        .flush = cifs_flush,
        .mmap  = cifs_file_mmap,
        .sendfile = generic_file_sendfile,
+        .llseek = cifs_llseek,
 #ifdef CONFIG_CIFS_POSIX
        .ioctl  = cifs_ioctl,
 #endif /* CONFIG_CIFS_POSIX */
@@ -649,7 +662,7 @@ struct file_operations cifs_file_direct_nobrl_ops = {
 #ifdef CONFIG_CIFS_POSIX
        .ioctl  = cifs_ioctl,
 #endif /* CONFIG_CIFS_POSIX */
+        .llseek = cifs_llseek,
 #ifdef CONFIG_CIFS_EXPERIMENTAL
        .dir_notify = cifs_dir_notify,
 #endif /* CONFIG_CIFS_EXPERIMENTAL */
@@ -733,7 +746,7 @@ cifs_init_request_bufs(void)
                kmem_cache_destroy(cifs_req_cachep);
                return -ENOMEM;
        }
-        /* 256 (MAX_CIFS_HDR_SIZE bytes is enough for most SMB responses and
+        /* MAX_CIFS_SMALL_BUFFER_SIZE bytes is enough for most SMB responses and
        almost all handle based requests (but not write response, nor is it
        sufficient for path based requests).  A smaller size would have
        been more efficient (compacting multiple slab items on one 4k page) 
@@ -742,7 +755,8 @@ cifs_init_request_bufs(void)
        efficient to alloc 1 per page off the slab compared to 17K (5page) 
        alloc of large cifs buffers even when page debugging is on */
        cifs_sm_req_cachep = kmem_cache_create("cifs_small_rq",
-                        MAX_CIFS_HDR_SIZE, 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+                        MAX_CIFS_SMALL_BUFFER_SIZE, 0, SLAB_HWCACHE_ALIGN, 
+                        NULL, NULL);
        if (cifs_sm_req_cachep == NULL) {
                mempool_destroy(cifs_req_poolp);
                kmem_cache_destroy(cifs_req_cachep);
@@ -860,9 +874,9 @@ static int cifs_oplock_thread(void * dummyarg)
                                DeleteOplockQEntry(oplock_item);
                                /* can not grab inode sem here since it would
                                deadlock when oplock received on delete 
-                                since vfs_unlink holds the i_sem across
+                                since vfs_unlink holds the i_mutex across
                                the call */
-                                /* down(&inode->i_sem);*/
+                                /* mutex_lock(&inode->i_mutex);*/
                                if (S_ISREG(inode->i_mode)) {
                                        rc = filemap_fdatawrite(inode->i_mapping);
                                        if(CIFS_I(inode)->clientCanCacheRead == 0) {
@@ -871,7 +885,7 @@ static int cifs_oplock_thread(void * dummyarg)
                                        }
                                } else
                                        rc = 0;
-                                /* up(&inode->i_sem);*/
+                                /* mutex_unlock(&inode->i_mutex);*/
                                if (rc)
                                        CIFS_I(inode)->write_behind_rc = rc;
                                cFYI(1,("Oplock flush inode %p rc %d",inode,rc));
@@ -954,6 +968,12 @@ init_cifs(void)
        atomic_set(&tconInfoReconnectCount, 0);
        atomic_set(&bufAllocCount, 0);
+        atomic_set(&smBufAllocCount, 0);
+#ifdef CONFIG_CIFS_STATS2
+        atomic_set(&totBufAllocCount, 0);
+        atomic_set(&totSmBufAllocCount, 0);
+#endif /* CONFIG_CIFS_STATS2 */
        atomic_set(&midCount, 0);
        GlobalCurrentXid = 0;
        GlobalTotalActiveXid = 0;
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 9ec40e0e54fc..821a8eb22559 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -99,5 +99,5 @@ extern ssize_t	cifs_getxattr(struct dentry *, const char *, void *, size_t);
 extern ssize_t  cifs_listxattr(struct dentry *, char *, size_t);
 extern int cifs_ioctl (struct inode * inode, struct file * filep,
                       unsigned int command, unsigned long arg);
-#define CIFS_VERSION   "1.39"
+#define CIFS_VERSION   "1.40"
 #endif                          /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 1ba08f8c5bc4..7bed27601ce5 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -233,6 +233,8 @@ struct cifsTconInfo {
        atomic_t num_hardlinks;
        atomic_t num_symlinks;
        atomic_t num_locks;
+        atomic_t num_acl_get;
+        atomic_t num_acl_set;
 #ifdef CONFIG_CIFS_STATS2
        unsigned long long time_writes;
        unsigned long long time_reads;
@@ -285,6 +287,7 @@ struct cifs_search_info {
        unsigned endOfSearch:1;
        unsigned emptyDir:1;
        unsigned unicode:1;
+        unsigned smallBuf:1; /* so we know which buf_release function to call */
 };
 struct cifsFileInfo {
@@ -420,7 +423,12 @@ struct dir_notify_req {
 #define   MID_RESPONSE_RECEIVED 4
 #define   MID_RETRY_NEEDED      8 /* session closed while this request out */
 #define   MID_NO_RESP_NEEDED 0x10
-#define   MID_SMALL_BUFFER   0x20 /* 112 byte response buffer instead of 4K */
+/* Types of response buffer returned from SendReceive2 */
+#define   CIFS_NO_BUFFER        0    /* Response buffer not returned */
+#define   CIFS_SMALL_BUFFER     1
+#define   CIFS_LARGE_BUFFER     2
+#define   CIFS_IOVEC            4    /* array of response buffers */
 /*
 *****************************************************************
@@ -505,8 +513,12 @@ GLOBAL_EXTERN atomic_t tcpSesReconnectCount;
 GLOBAL_EXTERN atomic_t tconInfoReconnectCount;
 /* Various Debug counters to remove someday (BB) */
-GLOBAL_EXTERN atomic_t bufAllocCount;
+GLOBAL_EXTERN atomic_t bufAllocCount;    /* current number allocated  */
-GLOBAL_EXTERN atomic_t smBufAllocCount;      
+#ifdef CONFIG_CIFS_STATS2
+GLOBAL_EXTERN atomic_t totBufAllocCount; /* total allocated over all time */
+GLOBAL_EXTERN atomic_t totSmBufAllocCount;
+#endif
+GLOBAL_EXTERN atomic_t smBufAllocCount;
 GLOBAL_EXTERN atomic_t midCount;
 /* Misc globals */
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 33e1859fd2f6..cc2471094ca5 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -1,7 +1,7 @@
 /*
 *   fs/cifs/cifspdu.h
 *
- *   Copyright (c) International Business Machines  Corp., 2002
+ *   Copyright (c) International Business Machines  Corp., 2002,2005
 *   Author(s): Steve French (sfrench@us.ibm.com)
 *
 *   This library is free software; you can redistribute it and/or modify
@@ -80,7 +80,11 @@
 #define NT_TRANSACT_GET_USER_QUOTA    0x07
 #define NT_TRANSACT_SET_USER_QUOTA    0x08
-#define MAX_CIFS_HDR_SIZE 256   /* is future chained NTCreateXReadX bigger? */
+#define MAX_CIFS_SMALL_BUFFER_SIZE 448 /* big enough for most */
+/* future chained NTCreateXReadX bigger, but for time being NTCreateX biggest */
+/* among the requests (NTCreateX response is bigger with wct of 34) */
+#define MAX_CIFS_HDR_SIZE 0x58 /* 4 len + 32 hdr + (2*24 wct) + 2 bct + 2 pad */
+#define CIFS_SMALL_PATH 120 /* allows for (448-88)/3 */
 /* internal cifs vfs structures */
 /*****************************************************************
@@ -524,7 +528,7 @@ typedef union smb_com_session_setup_andx {
                /* STRING PrimaryDomain */
                /* STRING NativeOS */
                /* STRING NativeLanMan */
-        } __attribute__((packed)) old_req;              /* pre-NTLM (LANMAN2.1) request format */
+        } __attribute__((packed)) old_req; /* pre-NTLM (LANMAN2.1) req format */
        struct {                /* default (NTLM) response format */
                struct smb_hdr hdr;     /* wct = 3 */
@@ -536,7 +540,7 @@ typedef union smb_com_session_setup_andx {
                unsigned char NativeOS[1];      /* followed by */
 /*      unsigned char * NativeLanMan; */
 /*      unsigned char * PrimaryDomain; */
-        } __attribute__((packed)) old_resp;             /* pre-NTLM (LANMAN2.1) response format */
+        } __attribute__((packed)) old_resp; /* pre-NTLM (LANMAN2.1) response */
 } __attribute__((packed)) SESSION_SETUP_ANDX;
 #define CIFS_NETWORK_OPSYS "CIFS VFS Client for Linux"
@@ -1003,10 +1007,49 @@ typedef struct smb_com_setattr_rsp {
 /* empty wct response to setattr */
-/***************************************************/
+/*******************************************************/
-/* NT Transact structure defintions follow         */
+/* NT Transact structure defintions follow             */
-/* Currently only ioctl and notify are implemented */
+/* Currently only ioctl, acl (get security descriptor) */  
-/***************************************************/
+/* and notify are implemented                          */
+/*******************************************************/
+typedef struct smb_com_ntransact_req {
+        struct smb_hdr hdr; /* wct >= 19 */
+        __u8 MaxSetupCount;
+        __u16 Reserved;
+        __le32 TotalParameterCount;
+        __le32 TotalDataCount;
+        __le32 MaxParameterCount;
+        __le32 MaxDataCount;
+        __le32 ParameterCount;
+        __le32 ParameterOffset;
+        __le32 DataCount;
+        __le32 DataOffset;
+        __u8 SetupCount; /* four setup words follow subcommand */
+        /* SNIA spec incorrectly included spurious pad here */
+        __le16 SubCommand; /* 2 = IOCTL/FSCTL */
+        /* SetupCount words follow then */ 
+        __le16 ByteCount;
+        __u8 Pad[3];
+        __u8 Parms[0];
+} __attribute__((packed)) NTRANSACT_REQ;
+typedef struct smb_com_ntransact_rsp {
+        struct smb_hdr hdr;     /* wct = 18 */
+        __u8 Reserved[3];
+        __le32 TotalParameterCount;
+        __le32 TotalDataCount;
+        __le32 ParameterCount;
+        __le32 ParameterOffset;
+        __le32 ParameterDisplacement;
+        __le32 DataCount;
+        __le32 DataOffset;
+        __le32 DataDisplacement;
+        __u8 SetupCount;   /* 0 */
+        __u16 ByteCount;
+        /* __u8 Pad[3]; */
+        /* parms and data follow */
+} __attribute__((packed)) NTRANSACT_RSP;
 typedef struct smb_com_transaction_ioctl_req {
        struct smb_hdr hdr;     /* wct = 23 */
        __u8 MaxSetupCount;
@@ -1021,11 +1064,11 @@ typedef struct smb_com_transaction_ioctl_req {
        __le32 DataOffset;
        __u8 SetupCount; /* four setup words follow subcommand */
        /* SNIA spec incorrectly included spurious pad here */
-        __le16 SubCommand;/* 2 = IOCTL/FSCTL */
+        __le16 SubCommand; /* 2 = IOCTL/FSCTL */
        __le32 FunctionCode;
        __u16 Fid;
-        __u8 IsFsctl;    /* 1 = File System Control, 0 = device control (IOCTL)*/
+        __u8 IsFsctl;  /* 1 = File System Control 0 = device control (IOCTL) */
-        __u8 IsRootFlag; /* 1 = apply command to root of share (must be DFS share)*/
+        __u8 IsRootFlag; /* 1 = apply command to root of share (must be DFS) */
        __le16 ByteCount;
        __u8 Pad[3];
        __u8 Data[1];
@@ -1045,9 +1088,35 @@ typedef struct smb_com_transaction_ioctl_rsp {
        __u8 SetupCount;        /* 1 */
        __le16 ReturnedDataLen;
        __u16 ByteCount;
-        __u8 Pad[3];
 } __attribute__((packed)) TRANSACT_IOCTL_RSP;
+#define CIFS_ACL_OWNER 1
+#define CIFS_ACL_GROUP 2
+#define CIFS_ACL_DACL  4
+#define CIFS_ACL_SACL  8
+typedef struct smb_com_transaction_qsec_req {
+        struct smb_hdr hdr;     /* wct = 19 */
+        __u8 MaxSetupCount;
+        __u16 Reserved;
+        __le32 TotalParameterCount;
+        __le32 TotalDataCount;
+        __le32 MaxParameterCount;
+        __le32 MaxDataCount;
+        __le32 ParameterCount;
+        __le32 ParameterOffset;
+        __le32 DataCount;
+        __le32 DataOffset;
+        __u8 SetupCount; /* no setup words follow subcommand */
+        /* SNIA spec incorrectly included spurious pad here */
+        __le16 SubCommand; /* 6 = QUERY_SECURITY_DESC */
+        __le16 ByteCount; /* bcc = 3 + 8 */
+        __u8 Pad[3];
+        __u16 Fid;
+        __u16 Reserved2;
+        __le32 AclFlags;
+} __attribute__((packed)) QUERY_SEC_DESC_REQ;
 typedef struct smb_com_transaction_change_notify_req {
        struct smb_hdr hdr;     /* wct = 23 */
        __u8 MaxSetupCount;
@@ -1068,10 +1137,12 @@ typedef struct smb_com_transaction_change_notify_req {
        __u8 WatchTree;  /* 1 = Monitor subdirectories */
        __u8 Reserved2;
        __le16 ByteCount;
-/* __u8 Pad[3];*/
+/*      __u8 Pad[3];*/
 /*      __u8 Data[1];*/
 } __attribute__((packed)) TRANSACT_CHANGE_NOTIFY_REQ;
+/* BB eventually change to use generic ntransact rsp struct 
+      and validation routine */
 typedef struct smb_com_transaction_change_notify_rsp {
        struct smb_hdr hdr;     /* wct = 18 */
        __u8 Reserved[3];
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 1b73f4f4c5ce..3c03aadaff0c 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -48,8 +48,8 @@ extern int SendReceive(const unsigned int /* xid */ , struct cifsSesInfo *,
                        struct smb_hdr * /* out */ ,
                        int * /* bytes returned */ , const int long_op);
 extern int SendReceive2(const unsigned int /* xid */ , struct cifsSesInfo *,
-                        struct kvec *, int /* nvec */,
+                        struct kvec *, int /* nvec to send */, 
-                        int * /* bytes returned */ , const int long_op);
+                        int * /* type of buf returned */ , const int long_op);
 extern int checkSMBhdr(struct smb_hdr *smb, __u16 mid);
 extern int checkSMB(struct smb_hdr *smb, __u16 mid, int length);
 extern int is_valid_oplock_break(struct smb_hdr *smb);
@@ -93,11 +93,12 @@ extern int CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
                        const struct nls_table *);
 extern int CIFSFindFirst(const int xid, struct cifsTconInfo *tcon,
-            const char *searchName, const struct nls_table *nls_codepage,
+                const char *searchName, const struct nls_table *nls_codepage,
-            __u16 *searchHandle, struct cifs_search_info * psrch_inf, int map, const char dirsep);
+                __u16 *searchHandle, struct cifs_search_info * psrch_inf, 
+                int map, const char dirsep);
 extern int CIFSFindNext(const int xid, struct cifsTconInfo *tcon,
-            __u16 searchHandle, struct cifs_search_info * psrch_inf);
+                __u16 searchHandle, struct cifs_search_info * psrch_inf);
 extern int CIFSFindClose(const int, struct cifsTconInfo *tcon,
                        const __u16 search_handle);
@@ -230,19 +231,18 @@ extern int CIFSSMBClose(const int xid, struct cifsTconInfo *tcon,
                        const int smb_file_id);
 extern int CIFSSMBRead(const int xid, struct cifsTconInfo *tcon,
-                        const int netfid, unsigned int count,
+                        const int netfid, unsigned int count,
-                        const __u64 lseek, unsigned int *nbytes, char **buf);
+                        const __u64 lseek, unsigned int *nbytes, char **buf,
+                        int * return_buf_type);
 extern int CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon,
                        const int netfid, const unsigned int count,
                        const __u64 lseek, unsigned int *nbytes,
                        const char *buf, const char __user *ubuf, 
                        const int long_op);
-#ifdef CONFIG_CIFS_EXPERIMENTAL
 extern int CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon,
                        const int netfid, const unsigned int count,
                        const __u64 offset, unsigned int *nbytes, 
                        struct kvec *iov, const int nvec, const int long_op);
-#endif /* CONFIG_CIFS_EXPERIMENTAL */
 extern int CIFSGetSrvInodeNumber(const int xid, struct cifsTconInfo *tcon,
                        const unsigned char *searchName, __u64 * inode_number,
                        const struct nls_table *nls_codepage, 
@@ -269,6 +269,8 @@ extern void tconInfoFree(struct cifsTconInfo *);
 extern int cifs_reconnect(struct TCP_Server_Info *server);
 extern int cifs_sign_smb(struct smb_hdr *, struct TCP_Server_Info *,__u32 *);
+extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *,
+                          __u32 *);
 extern int cifs_verify_signature(struct smb_hdr *, const char * mac_key,
        __u32 expected_sequence_number);
 extern int cifs_calculate_mac_key(char * key,const char * rn,const char * pass);
@@ -297,6 +299,9 @@ extern int CIFSSMBSetEA(const int xid, struct cifsTconInfo *tcon,
                const char *fileName, const char * ea_name, 
                const void * ea_value, const __u16 ea_value_len, 
                const struct nls_table *nls_codepage, int remap_special_chars);
+extern int CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon,
+                        __u16 fid, char *acl_inf, const int buflen,
+                        const int acl_type /* ACCESS vs. DEFAULT */);
 extern int CIFSSMBGetPosixACL(const int xid, struct cifsTconInfo *tcon,
                const unsigned char *searchName,
                char *acl_inf, const int buflen,const int acl_type,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 6867e556d37e..217323b0c896 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -37,6 +37,7 @@
 #include "cifsproto.h"
 #include "cifs_unicode.h"
 #include "cifs_debug.h"
+#include "cifsacl.h"
 #ifdef CONFIG_CIFS_POSIX
 static struct {
@@ -372,8 +373,10 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
        rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB,
                         (struct smb_hdr *) pSMBr, &bytes_returned, 0);
        if (rc == 0) {
-                server->secMode = pSMBr->SecurityMode;  
+                server->secMode = pSMBr->SecurityMode;
-                server->secType = NTLM; /* BB override default for 
+                if((server->secMode & SECMODE_USER) == 0)
+                        cFYI(1,("share mode security"));
+                server->secType = NTLM; /* BB override default for
                                           NTLMv2 or kerberos v5 */
                /* one byte - no need to convert this or EncryptionKeyLen
                   from little endian */
@@ -383,7 +386,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
                        min(le32_to_cpu(pSMBr->MaxBufferSize),
                        (__u32) CIFSMaxBufSize + MAX_CIFS_HDR_SIZE);
                server->maxRw = le32_to_cpu(pSMBr->MaxRawSize);
-                cFYI(0, ("Max buf = %d ", ses->server->maxBuf));
+                cFYI(0, ("Max buf = %d", ses->server->maxBuf));
                GETU32(ses->server->sessid) = le32_to_cpu(pSMBr->SessionKey);
                server->capabilities = le32_to_cpu(pSMBr->Capabilities);
                server->timeZone = le16_to_cpu(pSMBr->ServerTimeZone);  
@@ -411,8 +414,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
                                                (server->server_GUID,
                                                pSMBr->u.extended_response.
                                                GUID, 16) != 0) {
-                                                cFYI(1,
+                                                cFYI(1, ("server UID changed"));
-                                                     ("UID of server does not match previous connection to same ip address"));
                                                memcpy(server->
                                                        server_GUID,
                                                        pSMBr->u.
@@ -958,21 +960,19 @@ openRetry:
        return rc;
 }
-/* If no buffer passed in, then caller wants to do the copy
-        as in the case of readpages so the SMB buffer must be
-        freed by the caller */
 int
 CIFSSMBRead(const int xid, struct cifsTconInfo *tcon,
-            const int netfid, const unsigned int count,
+            const int netfid, const unsigned int count,
-            const __u64 lseek, unsigned int *nbytes, char **buf)
+            const __u64 lseek, unsigned int *nbytes, char **buf,
+            int * pbuf_type)
 {
        int rc = -EACCES;
        READ_REQ *pSMB = NULL;
        READ_RSP *pSMBr = NULL;
        char *pReadData = NULL;
-        int bytes_returned;
        int wct;
+        int resp_buf_type = 0;
+        struct kvec iov[1];
        cFYI(1,("Reading %d bytes on fid %d",count,netfid));
        if(tcon->ses->capabilities & CAP_LARGE_FILES)
@@ -981,8 +981,7 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon,
                wct = 10; /* old style read */
        *nbytes = 0;
-        rc = smb_init(SMB_COM_READ_ANDX, wct, tcon, (void **) &pSMB,
+        rc = small_smb_init(SMB_COM_READ_ANDX, wct, tcon, (void **) &pSMB);
-                      (void **) &pSMBr);
        if (rc)
                return rc;
@@ -990,13 +989,13 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon,
        if (tcon->ses->server == NULL)
                return -ECONNABORTED;
-        pSMB->AndXCommand = 0xFF;       /* none */
+        pSMB->AndXCommand = 0xFF;       /* none */
        pSMB->Fid = netfid;
        pSMB->OffsetLow = cpu_to_le32(lseek & 0xFFFFFFFF);
        if(wct == 12)
                pSMB->OffsetHigh = cpu_to_le32(lseek >> 32);
-        else if((lseek >> 32) > 0) /* can not handle this big offset for old */
+        else if((lseek >> 32) > 0) /* can not handle this big offset for old */
-                return -EIO;
+                return -EIO;
        pSMB->Remaining = 0;
        pSMB->MaxCount = cpu_to_le16(count & 0xFFFF);
@@ -1005,14 +1004,18 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon,
                pSMB->ByteCount = 0;  /* no need to do le conversion since 0 */
        else {
                /* old style read */
-                struct smb_com_readx_req * pSMBW = 
+                struct smb_com_readx_req * pSMBW =
                        (struct smb_com_readx_req *)pSMB;
-                pSMBW->ByteCount = 0;   
+                pSMBW->ByteCount = 0;
        }
-        
-        rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
+        iov[0].iov_base = (char *)pSMB;
-                         (struct smb_hdr *) pSMBr, &bytes_returned, 0);
+        iov[0].iov_len = pSMB->hdr.smb_buf_length + 4;
+        rc = SendReceive2(xid, tcon->ses, iov, 
+                          1 /* num iovecs */,
+                          &resp_buf_type, 0); 
        cifs_stats_inc(&tcon->num_reads);
+        pSMBr = (READ_RSP *)iov[0].iov_base;
        if (rc) {
                cERROR(1, ("Send error in read = %d", rc));
        } else {
@@ -1022,33 +1025,43 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon,
                *nbytes = data_length;
                /*check that DataLength would not go beyond end of SMB */
-                if ((data_length > CIFSMaxBufSize) 
+                if ((data_length > CIFSMaxBufSize)
                                || (data_length > count)) {
                        cFYI(1,("bad length %d for count %d",data_length,count));
                        rc = -EIO;
                        *nbytes = 0;
                } else {
-                        pReadData =
+                        pReadData = (char *) (&pSMBr->hdr.Protocol) +
-                            (char *) (&pSMBr->hdr.Protocol) +
                            le16_to_cpu(pSMBr->DataOffset);
-/*                      if(rc = copy_to_user(buf, pReadData, data_length)) {
+/*                      if(rc = copy_to_user(buf, pReadData, data_length)) {
-                                cERROR(1,("Faulting on read rc = %d",rc));
+                                cERROR(1,("Faulting on read rc = %d",rc));
-                                rc = -EFAULT;
+                                rc = -EFAULT;
-                        }*/ /* can not use copy_to_user when using page cache*/
+                        }*/ /* can not use copy_to_user when using page cache*/
                        if(*buf)
-                            memcpy(*buf,pReadData,data_length);
+                                memcpy(*buf,pReadData,data_length);
                }
        }
-        if(*buf)
-                cifs_buf_release(pSMB);
-        else
-                *buf = (char *)pSMB;
-        /* Note: On -EAGAIN error only caller can retry on handle based calls 
+        cifs_small_buf_release(pSMB);
+        if(*buf) {
+                if(resp_buf_type == CIFS_SMALL_BUFFER)
+                        cifs_small_buf_release(iov[0].iov_base);
+                else if(resp_buf_type == CIFS_LARGE_BUFFER)
+                        cifs_buf_release(iov[0].iov_base);
+        } else /* return buffer to caller to free */ /* BB FIXME how do we tell caller if it is not a large buffer */ {
+                *buf = iov[0].iov_base;
+                if(resp_buf_type == CIFS_SMALL_BUFFER)
+                        *pbuf_type = CIFS_SMALL_BUFFER;
+                else if(resp_buf_type == CIFS_LARGE_BUFFER)
+                        *pbuf_type = CIFS_LARGE_BUFFER;
+        }
+        /* Note: On -EAGAIN error only caller can retry on handle based calls
                since file handle passed in no longer valid */
        return rc;
 }
 int
 CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon,
             const int netfid, const unsigned int count,
@@ -1155,7 +1168,6 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon,
        return rc;
 }
-#ifdef CONFIG_CIFS_EXPERIMENTAL
 int
 CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon,
             const int netfid, const unsigned int count,
@@ -1164,10 +1176,10 @@ CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon,
 {
        int rc = -EACCES;
        WRITE_REQ *pSMB = NULL;
-        int bytes_returned, wct;
+        int wct;
        int smb_hdr_len;
+        int resp_buf_type = 0;
-        /* BB removeme BB */
        cFYI(1,("write2 at %lld %d bytes", (long long)offset, count));
        if(tcon->ses->capabilities & CAP_LARGE_FILES)
@@ -1210,22 +1222,34 @@ CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon,
                pSMBW->ByteCount = cpu_to_le16(count + 5);
        }
        iov[0].iov_base = pSMB;
-        iov[0].iov_len = smb_hdr_len + 4;
+        if(wct == 14)
+                iov[0].iov_len = smb_hdr_len + 4;
+        else /* wct == 12 pad bigger by four bytes */
+                iov[0].iov_len = smb_hdr_len + 8;
+        
-        rc = SendReceive2(xid, tcon->ses, iov, n_vec + 1, &bytes_returned,
+        rc = SendReceive2(xid, tcon->ses, iov, n_vec + 1, &resp_buf_type,
                          long_op);
        cifs_stats_inc(&tcon->num_writes);
        if (rc) {
                cFYI(1, ("Send error Write2 = %d", rc));
                *nbytes = 0;
+        } else if(resp_buf_type == 0) {
+                /* presumably this can not happen, but best to be safe */
+                rc = -EIO;
+                *nbytes = 0;
        } else {
-                WRITE_RSP * pSMBr = (WRITE_RSP *)pSMB;
+                WRITE_RSP * pSMBr = (WRITE_RSP *)iov[0].iov_base;
                *nbytes = le16_to_cpu(pSMBr->CountHigh);
                *nbytes = (*nbytes) << 16;
                *nbytes += le16_to_cpu(pSMBr->Count);
-        }
+        } 
        cifs_small_buf_release(pSMB);
+        if(resp_buf_type == CIFS_SMALL_BUFFER)
+                cifs_small_buf_release(iov[0].iov_base);
+        else if(resp_buf_type == CIFS_LARGE_BUFFER)
+                cifs_buf_release(iov[0].iov_base);
        /* Note: On -EAGAIN error only caller can retry on handle based calls 
                since file handle passed in no longer valid */
@@ -1234,8 +1258,6 @@ CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon,
 }
-#endif /* CIFS_EXPERIMENTAL */
 int
 CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
            const __u16 smb_file_id, const __u64 len,
@@ -1906,6 +1928,90 @@ querySymLinkRetry:
        return rc;
 }
+/* Initialize NT TRANSACT SMB into small smb request buffer.
+   This assumes that all NT TRANSACTS that we init here have
+   total parm and data under about 400 bytes (to fit in small cifs
+   buffer size), which is the case so far, it easily fits. NB:
+        Setup words themselves and ByteCount
+        MaxSetupCount (size of returned setup area) and
+        MaxParameterCount (returned parms size) must be set by caller */
+static int 
+smb_init_ntransact(const __u16 sub_command, const int setup_count,
+                   const int parm_len, struct cifsTconInfo *tcon,
+                   void ** ret_buf)
+{
+        int rc;
+        __u32 temp_offset;
+        struct smb_com_ntransact_req * pSMB;
+        rc = small_smb_init(SMB_COM_NT_TRANSACT, 19 + setup_count, tcon,
+                                (void **)&pSMB);
+        if (rc)
+                return rc;
+        *ret_buf = (void *)pSMB;
+        pSMB->Reserved = 0;
+        pSMB->TotalParameterCount = cpu_to_le32(parm_len);
+        pSMB->TotalDataCount  = 0;
+        pSMB->MaxDataCount = cpu_to_le32((tcon->ses->server->maxBuf -
+                                          MAX_CIFS_HDR_SIZE) & 0xFFFFFF00);
+        pSMB->ParameterCount = pSMB->TotalParameterCount;
+        pSMB->DataCount  = pSMB->TotalDataCount;
+        temp_offset = offsetof(struct smb_com_ntransact_req, Parms) +
+                        (setup_count * 2) - 4 /* for rfc1001 length itself */;
+        pSMB->ParameterOffset = cpu_to_le32(temp_offset);
+        pSMB->DataOffset = cpu_to_le32(temp_offset + parm_len);
+        pSMB->SetupCount = setup_count; /* no need to le convert byte fields */
+        pSMB->SubCommand = cpu_to_le16(sub_command);
+        return 0;
+}
+static int
+validate_ntransact(char * buf, char ** ppparm, char ** ppdata,
+                   int * pdatalen, int * pparmlen)
+{
+        char * end_of_smb;
+        __u32 data_count, data_offset, parm_count, parm_offset;
+        struct smb_com_ntransact_rsp * pSMBr;
+        if(buf == NULL)
+                return -EINVAL;
+        pSMBr = (struct smb_com_ntransact_rsp *)buf;
+        /* ByteCount was converted from little endian in SendReceive */
+        end_of_smb = 2 /* sizeof byte count */ + pSMBr->ByteCount + 
+                        (char *)&pSMBr->ByteCount;
+                
+        data_offset = le32_to_cpu(pSMBr->DataOffset);
+        data_count = le32_to_cpu(pSMBr->DataCount);
+        parm_offset = le32_to_cpu(pSMBr->ParameterOffset);
+        parm_count = le32_to_cpu(pSMBr->ParameterCount);
+        *ppparm = (char *)&pSMBr->hdr.Protocol + parm_offset;
+        *ppdata = (char *)&pSMBr->hdr.Protocol + data_offset;
+        /* should we also check that parm and data areas do not overlap? */
+        if(*ppparm > end_of_smb) {
+                cFYI(1,("parms start after end of smb"));
+                return -EINVAL;
+        } else if(parm_count + *ppparm > end_of_smb) {
+                cFYI(1,("parm end after end of smb"));
+                return -EINVAL;
+        } else if(*ppdata > end_of_smb) {
+                cFYI(1,("data starts after end of smb"));
+                return -EINVAL;
+        } else if(data_count + *ppdata > end_of_smb) {
+                cFYI(1,("data %p + count %d (%p) ends after end of smb %p start %p",
+                        *ppdata, data_count, (data_count + *ppdata), end_of_smb, pSMBr));  /* BB FIXME */
+                return -EINVAL;
+        } else if(parm_count + data_count > pSMBr->ByteCount) {
+                cFYI(1,("parm count and data count larger than SMB"));
+                return -EINVAL;
+        }
+        return 0;
+}
 int
 CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
                        const unsigned char *searchName,
@@ -1928,7 +2034,8 @@ CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
        pSMB->TotalDataCount = 0;
        pSMB->MaxParameterCount = cpu_to_le32(2);
        /* BB find exact data count max from sess structure BB */
-        pSMB->MaxDataCount = cpu_to_le32(4000);
+        pSMB->MaxDataCount = cpu_to_le32((tcon->ses->server->maxBuf -
+                                          MAX_CIFS_HDR_SIZE) & 0xFFFFFF00);
        pSMB->MaxSetupCount = 4;
        pSMB->Reserved = 0;
        pSMB->ParameterOffset = 0;
@@ -1955,7 +2062,9 @@ CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
                        rc = -EIO;      /* bad smb */
                else {
                        if(data_count && (data_count < 2048)) {
-                                char * end_of_smb = pSMBr->ByteCount + (char *)&pSMBr->ByteCount;
+                                char * end_of_smb = 2 /* sizeof byte count */ +
+                                                pSMBr->ByteCount +
+                                                (char *)&pSMBr->ByteCount;
                                struct reparse_data * reparse_buf = (struct reparse_data *)
                                        ((char *)&pSMBr->hdr.Protocol + data_offset);
@@ -2199,6 +2308,7 @@ queryAclRetry:
        rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
                (struct smb_hdr *) pSMBr, &bytes_returned, 0);
+        cifs_stats_inc(&tcon->num_acl_get);
        if (rc) {
                cFYI(1, ("Send error in Query POSIX ACL = %d", rc));
        } else {
@@ -2386,6 +2496,92 @@ GetExtAttrOut:
 #endif /* CONFIG_POSIX */
+/* security id for everyone */
+const struct cifs_sid sid_everyone = {1, 1, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0}};
+/* group users */
+const struct cifs_sid sid_user = {1, 2 , {0, 0, 0, 0, 0, 5}, {32, 545, 0, 0}};
+/* Convert CIFS ACL to POSIX form */
+static int parse_sec_desc(struct cifs_sid * psec_desc, int acl_len)
+{
+        return 0;
+}
+/* Get Security Descriptor (by handle) from remote server for a file or dir */
+int
+CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid,
+         /*  BB fix up return info */ char *acl_inf, const int buflen, 
+                  const int acl_type /* ACCESS/DEFAULT not sure implication */)
+{
+        int rc = 0;
+        int buf_type = 0;
+        QUERY_SEC_DESC_REQ * pSMB;
+        struct kvec iov[1];
+        cFYI(1, ("GetCifsACL"));
+        rc = smb_init_ntransact(NT_TRANSACT_QUERY_SECURITY_DESC, 0, 
+                        8 /* parm len */, tcon, (void **) &pSMB);
+        if (rc)
+                return rc;
+        pSMB->MaxParameterCount = cpu_to_le32(4);
+        /* BB TEST with big acls that might need to be e.g. larger than 16K */
+        pSMB->MaxSetupCount = 0;
+        pSMB->Fid = fid; /* file handle always le */
+        pSMB->AclFlags = cpu_to_le32(CIFS_ACL_OWNER | CIFS_ACL_GROUP |
+                                     CIFS_ACL_DACL);
+        pSMB->ByteCount = cpu_to_le16(11); /* 3 bytes pad + 8 bytes parm */
+        pSMB->hdr.smb_buf_length += 11;
+        iov[0].iov_base = (char *)pSMB;
+        iov[0].iov_len = pSMB->hdr.smb_buf_length + 4;
+        rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovec */, &buf_type, 0);
+        cifs_stats_inc(&tcon->num_acl_get);
+        if (rc) {
+                cFYI(1, ("Send error in QuerySecDesc = %d", rc));
+        } else {                /* decode response */
+                struct cifs_sid * psec_desc;
+                __le32 * parm;
+                int parm_len;
+                int data_len;
+                int acl_len;
+                struct smb_com_ntransact_rsp * pSMBr;
+/* validate_nttransact */
+                rc = validate_ntransact(iov[0].iov_base, (char **)&parm, 
+                                        (char **)&psec_desc,
+                                        &parm_len, &data_len);
+                
+                if(rc)
+                        goto qsec_out;
+                pSMBr = (struct smb_com_ntransact_rsp *)iov[0].iov_base;
+                cERROR(1,("smb %p parm %p data %p",pSMBr,parm,psec_desc));  /* BB removeme BB */
+                if (le32_to_cpu(pSMBr->ParameterCount) != 4) {
+                        rc = -EIO;      /* bad smb */
+                        goto qsec_out;
+                }
+/* BB check that data area is minimum length and as big as acl_len */
+                acl_len = le32_to_cpu(*(__le32 *)parm);
+                /* BB check if(acl_len > bufsize) */
+                parse_sec_desc(psec_desc, acl_len);
+        }
+qsec_out:
+        if(buf_type == CIFS_SMALL_BUFFER)
+                cifs_small_buf_release(iov[0].iov_base);
+        else if(buf_type == CIFS_LARGE_BUFFER)
+                cifs_buf_release(iov[0].iov_base);
+        cifs_small_buf_release(pSMB);
+        return rc;
+}
 /* Legacy Query Path Information call for lookup to old servers such
   as Win9x/WinME */
 int SMBQueryInformation(const int xid, struct cifsTconInfo *tcon,
@@ -4284,7 +4480,7 @@ int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon,
 {
        int rc = 0;
        struct smb_com_transaction_change_notify_req * pSMB = NULL;
-        struct smb_com_transaction_change_notify_rsp * pSMBr = NULL;
+        struct smb_com_ntransaction_change_notify_rsp * pSMBr = NULL;
        struct dir_notify_req *dnotify_req;
        int bytes_returned;
@@ -4299,6 +4495,10 @@ int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon,
        pSMB->MaxParameterCount = cpu_to_le32(2);
        /* BB find exact data count max from sess structure BB */
        pSMB->MaxDataCount = 0; /* same in little endian or be */
+/* BB VERIFY verify which is correct for above BB */
+        pSMB->MaxDataCount = cpu_to_le32((tcon->ses->server->maxBuf -
+                                             MAX_CIFS_HDR_SIZE) & 0xFFFFFF00);
        pSMB->MaxSetupCount = 4;
        pSMB->Reserved = 0;
        pSMB->ParameterOffset = 0;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index c467de857610..88f60aa52058 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -76,12 +76,19 @@ struct smb_vol {
        unsigned setuids:1;
        unsigned noperm:1;
        unsigned no_psx_acl:1; /* set if posix acl support should be disabled */
+        unsigned cifs_acl:1;
        unsigned no_xattr:1;   /* set if xattr (EA) support should be disabled*/
        unsigned server_ino:1; /* use inode numbers from server ie UniqueId */
        unsigned direct_io:1;
        unsigned remap:1;   /* set to remap seven reserved chars in filenames */
        unsigned posix_paths:1;   /* unset to not ask for posix pathnames. */
        unsigned sfu_emul:1;
+        unsigned krb5:1;
+        unsigned ntlm:1;
+        unsigned ntlmv2:1;
+        unsigned nullauth:1; /* attempt to authenticate with null user */
+        unsigned sign:1;
+        unsigned seal:1;     /* encrypt */
        unsigned nocase;     /* request case insensitive filenames */
        unsigned nobrl;      /* disable sending byte range locks to srv */
        unsigned int rsize;
@@ -508,7 +515,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
                /* else length ok */
                reconnect = 0;
-                if(pdu_length > MAX_CIFS_HDR_SIZE - 4) {
+                if(pdu_length > MAX_CIFS_SMALL_BUFFER_SIZE - 4) {
                        isLargeBuf = TRUE;
                        memcpy(bigbuf, smallbuf, 4);
                        smb_buffer = bigbuf;
@@ -777,7 +784,7 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
        /* vol->retry default is 0 (i.e. "soft" limited retry not hard retry) */
        vol->rw = TRUE;
+        vol->ntlm = TRUE;
        /* default is always to request posix paths. */
        vol->posix_paths = 1;
@@ -903,6 +910,39 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
                                printk(KERN_WARNING "CIFS: ip address too long\n");
                                return 1;
                        }
+                } else if (strnicmp(data, "sec", 3) == 0) { 
+                        if (!value || !*value) {
+                                cERROR(1,("no security value specified"));
+                                continue;
+                        } else if (strnicmp(value, "krb5i", 5) == 0) {
+                                vol->sign = 1;
+                                vol->krb5 = 1;
+                        } else if (strnicmp(value, "krb5p", 5) == 0) {
+                                /* vol->seal = 1; 
+                                   vol->krb5 = 1; */
+                                cERROR(1,("Krb5 cifs privacy not supported"));
+                                return 1;
+                        } else if (strnicmp(value, "krb5", 4) == 0) {
+                                vol->krb5 = 1;
+                        } else if (strnicmp(value, "ntlmv2i", 7) == 0) {
+                                vol->ntlmv2 = 1;
+                                vol->sign = 1;
+                        } else if (strnicmp(value, "ntlmv2", 6) == 0) {
+                                vol->ntlmv2 = 1;
+                        } else if (strnicmp(value, "ntlmi", 5) == 0) {
+                                vol->ntlm = 1;
+                                vol->sign = 1;
+                        } else if (strnicmp(value, "ntlm", 4) == 0) {
+                                /* ntlm is default so can be turned off too */
+                                vol->ntlm = 1;
+                        } else if (strnicmp(value, "nontlm", 6) == 0) {
+                                vol->ntlm = 0;
+                        } else if (strnicmp(value, "none", 4) == 0) {
+                                vol->nullauth = 1; 
+                        } else {
+                                cERROR(1,("bad security option: %s", value));
+                                return 1;
+                        }
                } else if ((strnicmp(data, "unc", 3) == 0)
                           || (strnicmp(data, "target", 6) == 0)
                           || (strnicmp(data, "path", 4) == 0)) {
@@ -1120,6 +1160,10 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
                        vol->server_ino = 1;
                } else if (strnicmp(data, "noserverino",9) == 0) {
                        vol->server_ino = 0;
+                } else if (strnicmp(data, "cifsacl",7) == 0) {
+                        vol->cifs_acl = 1;
+                } else if (strnicmp(data, "nocifsacl", 9) == 0) {
+                        vol->cifs_acl = 0;
                } else if (strnicmp(data, "acl",3) == 0) {
                        vol->no_psx_acl = 0;
                } else if (strnicmp(data, "noacl",5) == 0) {
@@ -1546,7 +1590,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
                cFYI(1, ("Username: %s ", volume_info.username));
        } else {
-                cifserror("No username specified ");
+                cifserror("No username specified");
        /* In userspace mount helper we can get user name from alternate
           locations such as env variables and files on disk */
                kfree(volume_info.UNC);
@@ -1587,7 +1631,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
                return -EINVAL;
        } else /* which servers DFS root would we conect to */ {
                cERROR(1,
-                       ("CIFS mount error: No UNC path (e.g. -o unc=//192.168.1.100/public) specified  "));
+                       ("CIFS mount error: No UNC path (e.g. -o unc=//192.168.1.100/public) specified"));
                kfree(volume_info.UNC);
                kfree(volume_info.password);
                FreeXid(xid);
@@ -1626,7 +1670,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
        if (srvTcp) {
-                cFYI(1, ("Existing tcp session with server found "));                
+                cFYI(1, ("Existing tcp session with server found"));                
        } else {        /* create socket */
                if(volume_info.port)
                        sin_server.sin_port = htons(volume_info.port);
@@ -1689,11 +1733,11 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
        if (existingCifsSes) {
                pSesInfo = existingCifsSes;
-                cFYI(1, ("Existing smb sess found "));
+                cFYI(1, ("Existing smb sess found"));
                kfree(volume_info.password);
                /* volume_info.UNC freed at end of function */
        } else if (!rc) {
-                cFYI(1, ("Existing smb sess not found "));
+                cFYI(1, ("Existing smb sess not found"));
                pSesInfo = sesInfoAlloc();
                if (pSesInfo == NULL)
                        rc = -ENOMEM;
@@ -1751,7 +1795,8 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
                cifs_sb->mnt_gid = volume_info.linux_gid;
                cifs_sb->mnt_file_mode = volume_info.file_mode;
                cifs_sb->mnt_dir_mode = volume_info.dir_mode;
-                cFYI(1,("file mode: 0x%x  dir mode: 0x%x",cifs_sb->mnt_file_mode,cifs_sb->mnt_dir_mode));
+                cFYI(1,("file mode: 0x%x  dir mode: 0x%x",
+                        cifs_sb->mnt_file_mode,cifs_sb->mnt_dir_mode));
                if(volume_info.noperm)
                        cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_PERM;
@@ -1767,6 +1812,8 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
                        cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_UNX_EMUL;
                if(volume_info.nobrl)
                        cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_BRL;
+                if(volume_info.cifs_acl)
+                        cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_ACL;
                if(volume_info.direct_io) {
                        cFYI(1,("mounting share using direct i/o"));
@@ -1777,7 +1824,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
                    find_unc(sin_server.sin_addr.s_addr, volume_info.UNC,
                             volume_info.username);
                if (tcon) {
-                        cFYI(1, ("Found match on UNC path "));
+                        cFYI(1, ("Found match on UNC path"));
                        /* we can have only one retry value for a connection
                           to a share so for resources mounted more than once
                           to the same server share the last value passed in 
@@ -1926,7 +1973,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
        __u32 capabilities;
        __u16 count;
-        cFYI(1, ("In sesssetup "));
+        cFYI(1, ("In sesssetup"));
        if(ses == NULL)
                return -EINVAL;
        user = ses->userName;
@@ -3202,9 +3249,26 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
        pSMB->AndXCommand = 0xFF;
        pSMB->Flags = cpu_to_le16(TCON_EXTENDED_SECINFO);
-        pSMB->PasswordLength = cpu_to_le16(1);  /* minimum */
        bcc_ptr = &pSMB->Password[0];
-        bcc_ptr++;              /* skip password */
+        if((ses->server->secMode) & SECMODE_USER) {
+                pSMB->PasswordLength = cpu_to_le16(1);  /* minimum */
+                bcc_ptr++;              /* skip password */
+        } else {
+                pSMB->PasswordLength = cpu_to_le16(CIFS_SESSION_KEY_SIZE);
+                /* BB FIXME add code to fail this if NTLMv2 or Kerberos
+                   specified as required (when that support is added to
+                   the vfs in the future) as only NTLM or the much
+                   weaker LANMAN (which we do not send) is accepted
+                   by Samba (not sure whether other servers allow
+                   NTLMv2 password here) */
+                SMBNTencrypt(ses->password,
+                             ses->server->cryptKey,
+                             bcc_ptr);
+                bcc_ptr += CIFS_SESSION_KEY_SIZE;
+                *bcc_ptr = 0;
+                bcc_ptr++; /* align */
+        }
        if(ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
                smb_buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
@@ -3222,7 +3286,6 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
                bcc_ptr += 2 * length;  /* convert num of 16 bit words to bytes */
                bcc_ptr += 2;   /* skip trailing null */
        } else {                /* ASCII */
                strcpy(bcc_ptr, tree);
                bcc_ptr += strlen(tree) + 1;
        }
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 32cc96cafa3e..fed55e3c53df 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -3,7 +3,7 @@
 *
 *   vfs operations that deal with dentries
 * 
- *   Copyright (C) International Business Machines  Corp., 2002,2003
+ *   Copyright (C) International Business Machines  Corp., 2002,2005
 *   Author(s): Steve French (sfrench@us.ibm.com)
 *
 *   This library is free software; you can redistribute it and/or modify
@@ -200,8 +200,8 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
                        (oplock & CIFS_CREATE_ACTION))
                        if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
                                CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode,
-                                        (__u64)current->euid,
+                                        (__u64)current->fsuid,
-                                        (__u64)current->egid,
+                                        (__u64)current->fsgid,
                                        0 /* dev */,
                                        cifs_sb->local_nls, 
                                        cifs_sb->mnt_cifs_flags & 
@@ -325,7 +325,7 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
        else if (pTcon->ses->capabilities & CAP_UNIX) {
                if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
                        rc = CIFSSMBUnixSetPerms(xid, pTcon, full_path,
-                                mode,(__u64)current->euid,(__u64)current->egid,
+                                mode,(__u64)current->fsuid,(__u64)current->fsgid,
                                device_number, cifs_sb->local_nls,
                                cifs_sb->mnt_cifs_flags & 
                                        CIFS_MOUNT_MAP_SPECIAL_CHR);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 14a1c72ced92..77c990f0cb98 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -127,8 +127,7 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
                if (file->f_dentry->d_inode->i_mapping) {
                /* BB no need to lock inode until after invalidate
                   since namei code should already have it locked? */
-                        filemap_fdatawrite(file->f_dentry->d_inode->i_mapping);
+                        filemap_write_and_wait(file->f_dentry->d_inode->i_mapping);
-                        filemap_fdatawait(file->f_dentry->d_inode->i_mapping);
                }
                cFYI(1, ("invalidating remote inode since open detected it "
                         "changed"));
@@ -419,8 +418,7 @@ static int cifs_reopen_file(struct inode *inode, struct file *file,
                pCifsInode = CIFS_I(inode);
                if (pCifsInode) {
                        if (can_flush) {
-                                filemap_fdatawrite(inode->i_mapping);
+                                filemap_write_and_wait(inode->i_mapping);
-                                filemap_fdatawait(inode->i_mapping);
                        /* temporarily disable caching while we
                           go to server to get inode info */
                                pCifsInode->clientCanCacheAll = FALSE;
@@ -555,13 +553,13 @@ int cifs_closedir(struct inode *inode, struct file *file)
                }
                ptmp = pCFileStruct->srch_inf.ntwrk_buf_start;
                if (ptmp) {
-   /* BB removeme BB */ cFYI(1, ("freeing smb buf in srch struct in closedir"));
+                        cFYI(1, ("closedir free smb buf in srch struct"));
                        pCFileStruct->srch_inf.ntwrk_buf_start = NULL;
                        cifs_buf_release(ptmp);
                }
                ptmp = pCFileStruct->search_resume_name;
                if (ptmp) {
-   /* BB removeme BB */ cFYI(1, ("freeing resume name in closedir"));
+                        cFYI(1, ("closedir free resume name"));
                        pCFileStruct->search_resume_name = NULL;
                        kfree(ptmp);
                }
@@ -870,10 +868,9 @@ static ssize_t cifs_write(struct file *file, const char *write_data,
                                if (rc != 0)
                                        break;
                        }
-#ifdef CONFIG_CIFS_EXPERIMENTAL
                        /* BB FIXME We can not sign across two buffers yet */
-                        if((experimEnabled) && ((pTcon->ses->server->secMode & 
+                        if((pTcon->ses->server->secMode & 
-                         (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) == 0)) {
+                         (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) == 0) {
                                struct kvec iov[2];
                                unsigned int len;
@@ -889,7 +886,6 @@ static ssize_t cifs_write(struct file *file, const char *write_data,
                                                iov, 1, long_op);
                        } else
                        /* BB FIXME fixup indentation of line below */
-#endif                  
                        rc = CIFSSMBWrite(xid, pTcon,
                                 open_file->netfid,
                                 min_t(const int, cifs_sb->wsize, 
@@ -1026,7 +1022,6 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
        return rc;
 }
-#ifdef CONFIG_CIFS_EXPERIMENTAL
 static int cifs_writepages(struct address_space *mapping,
                           struct writeback_control *wbc)
 {
@@ -1229,7 +1224,6 @@ retry:
        return rc;
 }
-#endif
 static int cifs_writepage(struct page* page, struct writeback_control *wbc)
 {
@@ -1428,6 +1422,7 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data,
                rc = -EAGAIN;
                smb_read_data = NULL;
                while (rc == -EAGAIN) {
+                        int buf_type = CIFS_NO_BUFFER;
                        if ((open_file->invalidHandle) && 
                            (!open_file->closePend)) {
                                rc = cifs_reopen_file(file->f_dentry->d_inode,
@@ -1436,20 +1431,22 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data,
                                        break;
                        }
                        rc = CIFSSMBRead(xid, pTcon,
-                                        open_file->netfid,
+                                         open_file->netfid,
-                                        current_read_size, *poffset,
+                                         current_read_size, *poffset,
-                                        &bytes_read, &smb_read_data);
+                                         &bytes_read, &smb_read_data,
+                                         &buf_type);
                        pSMBr = (struct smb_com_read_rsp *)smb_read_data;
                        if (copy_to_user(current_offset, 
                                         smb_read_data + 4 /* RFC1001 hdr */
                                         + le16_to_cpu(pSMBr->DataOffset), 
                                         bytes_read)) {
                                rc = -EFAULT;
-                                FreeXid(xid);
+                        }
-                                return rc;
-            }
                        if (smb_read_data) {
-                                cifs_buf_release(smb_read_data);
+                                if(buf_type == CIFS_SMALL_BUFFER)
+                                        cifs_small_buf_release(smb_read_data);
+                                else if(buf_type == CIFS_LARGE_BUFFER)
+                                        cifs_buf_release(smb_read_data);
                                smb_read_data = NULL;
                        }
                }
@@ -1482,6 +1479,7 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
        int xid;
        char *current_offset;
        struct cifsFileInfo *open_file;
+        int buf_type = CIFS_NO_BUFFER;
        xid = GetXid();
        cifs_sb = CIFS_SB(file->f_dentry->d_sb);
@@ -1518,9 +1516,10 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
                                        break;
                        }
                        rc = CIFSSMBRead(xid, pTcon,
-                                        open_file->netfid,
+                                         open_file->netfid,
-                                        current_read_size, *poffset,
+                                         current_read_size, *poffset,
-                                        &bytes_read, &current_offset);
+                                         &bytes_read, &current_offset,
+                                         &buf_type);
                }
                if (rc || (bytes_read == 0)) {
                        if (total_read) {
@@ -1618,6 +1617,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
        struct smb_com_read_rsp *pSMBr;
        struct pagevec lru_pvec;
        struct cifsFileInfo *open_file;
+        int buf_type = CIFS_NO_BUFFER;
        xid = GetXid();
        if (file->private_data == NULL) {
@@ -1674,14 +1674,17 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
                        }
                        rc = CIFSSMBRead(xid, pTcon,
-                                        open_file->netfid,
+                                         open_file->netfid,
-                                        read_size, offset,
+                                         read_size, offset,
-                                        &bytes_read, &smb_read_data);
+                                         &bytes_read, &smb_read_data,
+                                         &buf_type);
                        /* BB more RC checks ? */
                        if (rc== -EAGAIN) {
                                if (smb_read_data) {
-                                        cifs_buf_release(smb_read_data);
+                                        if(buf_type == CIFS_SMALL_BUFFER)
+                                                cifs_small_buf_release(smb_read_data);
+                                        else if(buf_type == CIFS_LARGE_BUFFER)
+                                                cifs_buf_release(smb_read_data);
                                        smb_read_data = NULL;
                                }
                        }
@@ -1738,7 +1741,10 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
                        break;
                }
                if (smb_read_data) {
-                        cifs_buf_release(smb_read_data);
+                        if(buf_type == CIFS_SMALL_BUFFER)
+                                cifs_small_buf_release(smb_read_data);
+                        else if(buf_type == CIFS_LARGE_BUFFER)
+                                cifs_buf_release(smb_read_data);
                        smb_read_data = NULL;
                }
                bytes_read = 0;
@@ -1748,7 +1754,10 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
 /* need to free smb_read_data buf before exit */
        if (smb_read_data) {
-                cifs_buf_release(smb_read_data);
+                if(buf_type == CIFS_SMALL_BUFFER)
+                        cifs_small_buf_release(smb_read_data);
+                else if(buf_type == CIFS_LARGE_BUFFER)
+                        cifs_buf_release(smb_read_data);
                smb_read_data = NULL;
        } 
@@ -1827,10 +1836,20 @@ int is_size_safe_to_change(struct cifsInodeInfo *cifsInode)
                open_file =  find_writable_file(cifsInode);
 
        if(open_file) {
+                struct cifs_sb_info *cifs_sb;
                /* there is not actually a write pending so let
                this handle go free and allow it to
                be closable if needed */
                atomic_dec(&open_file->wrtPending);
+                cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
+                if ( cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO ) {
+                        /* since no page cache to corrupt on directio 
+                        we can change size safely */
+                        return 1;
+                }
                return 0;
        } else
                return 1;
@@ -1875,9 +1894,7 @@ struct address_space_operations cifs_addr_ops = {
        .readpage = cifs_readpage,
        .readpages = cifs_readpages,
        .writepage = cifs_writepage,
-#ifdef CONFIG_CIFS_EXPERIMENTAL
        .writepages = cifs_writepages,
-#endif
        .prepare_write = cifs_prepare_write,
        .commit_write = cifs_commit_write,
        .set_page_dirty = __set_page_dirty_nobuffers,
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 411c1f7f84da..59359911f481 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -229,11 +229,12 @@ static int decode_sfu_inode(struct inode * inode, __u64 size,
                         cifs_sb->mnt_cifs_flags &
                                CIFS_MOUNT_MAP_SPECIAL_CHR);
        if (rc==0) {
+                int buf_type = CIFS_NO_BUFFER;
                        /* Read header */
                rc = CIFSSMBRead(xid, pTcon,
                                 netfid,
                                 24 /* length */, 0 /* offset */,
-                                 &bytes_read, &pbuf);
+                                 &bytes_read, &pbuf, &buf_type);
                if((rc == 0) && (bytes_read >= 8)) {
                        if(memcmp("IntxBLK", pbuf, 8) == 0) {
                                cFYI(1,("Block device"));
@@ -267,7 +268,7 @@ static int decode_sfu_inode(struct inode * inode, __u64 size,
                } else {
                        inode->i_mode |= S_IFREG; /* then it is a file */
                        rc = -EOPNOTSUPP; /* or some unknown SFU type */        
-                }
+                }               
                CIFSSMBClose(xid, pTcon, netfid);
        }
        return rc;
@@ -750,8 +751,8 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
                        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
                                CIFSSMBUnixSetPerms(xid, pTcon, full_path,
                                                    mode,
-                                                    (__u64)current->euid,
+                                                    (__u64)current->fsuid,
-                                                    (__u64)current->egid,
+                                                    (__u64)current->fsgid,
                                                    0 /* dev_t */,
                                                    cifs_sb->local_nls,
                                                    cifs_sb->mnt_cifs_flags &
@@ -1040,9 +1041,9 @@ int cifs_revalidate(struct dentry *direntry)
        }
        /* can not grab this sem since kernel filesys locking documentation
-           indicates i_sem may be taken by the kernel on lookup and rename
+           indicates i_mutex may be taken by the kernel on lookup and rename
-           which could deadlock if we grab the i_sem here as well */
+           which could deadlock if we grab the i_mutex here as well */
-/*      down(&direntry->d_inode->i_sem);*/
+/*      mutex_lock(&direntry->d_inode->i_mutex);*/
        /* need to write out dirty pages here  */
        if (direntry->d_inode->i_mapping) {
                /* do we need to lock inode until after invalidate completes
@@ -1066,7 +1067,7 @@ int cifs_revalidate(struct dentry *direntry)
                        }
                }
        }
-/*      up(&direntry->d_inode->i_sem); */
+/*      mutex_unlock(&direntry->d_inode->i_mutex); */
        
        kfree(full_path);
        FreeXid(xid);
@@ -1148,8 +1149,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
        /* BB check if we need to refresh inode from server now ? BB */
        /* need to flush data before changing file size on server */
-        filemap_fdatawrite(direntry->d_inode->i_mapping);
+        filemap_write_and_wait(direntry->d_inode->i_mapping);
-        filemap_fdatawait(direntry->d_inode->i_mapping);
        if (attrs->ia_valid & ATTR_SIZE) {
                /* To avoid spurious oplock breaks from server, in the case of
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 94baf6c8ecbd..812c6bb0fe38 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -1,7 +1,7 @@
 /*
 *   fs/cifs/misc.c
 *
- *   Copyright (C) International Business Machines  Corp., 2002,2004
+ *   Copyright (C) International Business Machines  Corp., 2002,2005
 *   Author(s): Steve French (sfrench@us.ibm.com)
 *
 *   This library is free software; you can redistribute it and/or modify
@@ -161,6 +161,9 @@ cifs_buf_get(void)
        if (ret_buf) {
                memset(ret_buf, 0, sizeof(struct smb_hdr) + 3);
                atomic_inc(&bufAllocCount);
+#ifdef CONFIG_CIFS_STATS2
+                atomic_inc(&totBufAllocCount);
+#endif /* CONFIG_CIFS_STATS2 */
        }
        return ret_buf;
@@ -195,6 +198,10 @@ cifs_small_buf_get(void)
        /* No need to clear memory here, cleared in header assemble */
        /*      memset(ret_buf, 0, sizeof(struct smb_hdr) + 27);*/
                atomic_inc(&smBufAllocCount);
+#ifdef CONFIG_CIFS_STATS2
+                atomic_inc(&totSmBufAllocCount);
+#endif /* CONFIG_CIFS_STATS2 */
        }
        return ret_buf;
 }
@@ -292,7 +299,7 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ ,
        struct cifsSesInfo * ses;
        char *temp = (char *) buffer;
-        memset(temp,0,MAX_CIFS_HDR_SIZE);
+        memset(temp,0,256); /* bigger than MAX_CIFS_HDR_SIZE */
        buffer->smb_buf_length =
            (2 * word_count) + sizeof (struct smb_hdr) -
@@ -348,12 +355,12 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ ,
                /*  BB Add support for establishing new tCon and SMB Session  */
                /*      with userid/password pairs found on the smb session   */ 
                /*      for other target tcp/ip addresses               BB    */
-                                if(current->uid != treeCon->ses->linux_uid) {
+                                if(current->fsuid != treeCon->ses->linux_uid) {
-                                        cFYI(1,("Multiuser mode and UID did not match tcon uid "));
+                                        cFYI(1,("Multiuser mode and UID did not match tcon uid"));
                                        read_lock(&GlobalSMBSeslock);
                                        list_for_each(temp_item, &GlobalSMBSessionList) {
                                                ses = list_entry(temp_item, struct cifsSesInfo, cifsSessionList);
-                                                if(ses->linux_uid == current->uid) {
+                                                if(ses->linux_uid == current->fsuid) {
                                                        if(ses->server == treeCon->ses->server) {
                                                                cFYI(1,("found matching uid substitute right smb_uid"));  
                                                                buffer->Uid = ses->Suid;
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 9bdaaecae36f..288cc048d37f 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -214,8 +214,7 @@ static void fill_in_inode(struct inode *tmp_inode,
                        tmp_inode->i_fop = &cifs_file_nobrl_ops;
                else
                        tmp_inode->i_fop = &cifs_file_ops;
-                if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
-                        tmp_inode->i_fop->lock = NULL;
                tmp_inode->i_data.a_ops = &cifs_addr_ops;
                if((cifs_sb->tcon) && (cifs_sb->tcon->ses) &&
                   (cifs_sb->tcon->ses->server->maxBuf <
@@ -327,12 +326,18 @@ static void unix_fill_in_inode(struct inode *tmp_inode,
        if (S_ISREG(tmp_inode->i_mode)) {
                cFYI(1, ("File inode"));
                tmp_inode->i_op = &cifs_file_inode_ops;
-                if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO)
-                        tmp_inode->i_fop = &cifs_file_direct_ops;
+                if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
+                        if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
+                                tmp_inode->i_fop = &cifs_file_direct_nobrl_ops;
+                        else
+                                tmp_inode->i_fop = &cifs_file_direct_ops;
+                
+                } else if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
+                        tmp_inode->i_fop = &cifs_file_nobrl_ops;
                else
                        tmp_inode->i_fop = &cifs_file_ops;
-                if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
-                        tmp_inode->i_fop->lock = NULL;
                tmp_inode->i_data.a_ops = &cifs_addr_ops;
                if((cifs_sb->tcon) && (cifs_sb->tcon->ses) &&
                   (cifs_sb->tcon->ses->server->maxBuf < 
diff --git a/fs/cifs/rfc1002pdu.h b/fs/cifs/rfc1002pdu.h
index 9222033cad8e..aede606132aa 100644
--- a/fs/cifs/rfc1002pdu.h
+++ b/fs/cifs/rfc1002pdu.h
@@ -24,11 +24,11 @@
 /* NB: unlike smb/cifs packets, the RFC1002 structures are big endian */
        /* RFC 1002 session packet types */
-#define RFC1002_SESSION_MESASAGE 0x00
+#define RFC1002_SESSION_MESSAGE 0x00
 #define RFC1002_SESSION_REQUEST  0x81
 #define RFC1002_POSITIVE_SESSION_RESPONSE 0x82
 #define RFC1002_NEGATIVE_SESSION_RESPONSE 0x83
-#define RFC1002_RETARGET_SESSION_RESPONSE 0x83
+#define RFC1002_RETARGET_SESSION_RESPONSE 0x84
 #define RFC1002_SESSION_KEEP_ALIVE 0x85
        /* RFC 1002 flags (only one defined */
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index f8871196098c..7b98792150ea 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -206,7 +206,6 @@ smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer,
        return rc;
 }
-#ifdef CONFIG_CIFS_EXPERIMENTAL
 static int
 smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec,
          struct sockaddr *sin)
@@ -299,7 +298,7 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec,
 int
 SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, 
-             struct kvec *iov, int n_vec, int *pbytes_returned,
+             struct kvec *iov, int n_vec, int * pRespBufType /* ret */, 
             const int long_op)
 {
        int rc = 0;
@@ -307,6 +306,8 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
        unsigned long timeout;
        struct mid_q_entry *midQ;
        struct smb_hdr *in_buf = iov[0].iov_base;
+        
+        *pRespBufType = CIFS_NO_BUFFER;  /* no response buf yet */
        if (ses == NULL) {
                cERROR(1,("Null smb session"));
@@ -392,8 +393,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
                return -ENOMEM;
        }
-/* BB FIXME */
+        rc = cifs_sign_smb2(iov, n_vec, ses->server, &midQ->sequence_number);
-/*      rc = cifs_sign_smb2(iov, n_vec, ses->server, &midQ->sequence_number); */
        midQ->midState = MID_REQUEST_SUBMITTED;
 #ifdef CONFIG_CIFS_STATS2
@@ -489,21 +489,23 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
                        receive_len, xid));
                rc = -EIO;
        } else {                /* rcvd frame is ok */
                if (midQ->resp_buf && 
                        (midQ->midState == MID_RESPONSE_RECEIVED)) {
-                        in_buf->smb_buf_length = receive_len;
-                        /* BB verify that length would not overrun small buf */
-                        memcpy((char *)in_buf + 4,
-                               (char *)midQ->resp_buf + 4,
-                               receive_len);
-                        dump_smb(in_buf, 80);
+                        iov[0].iov_base = (char *)midQ->resp_buf;
+                        if(midQ->largeBuf)
+                                *pRespBufType = CIFS_LARGE_BUFFER;
+                        else
+                                *pRespBufType = CIFS_SMALL_BUFFER;
+                        iov[0].iov_len = receive_len + 4;
+                        iov[1].iov_len = 0;
+                        dump_smb(midQ->resp_buf, 80);
                        /* convert the length into a more usable form */
                        if((receive_len > 24) &&
                           (ses->server->secMode & (SECMODE_SIGN_REQUIRED |
                                        SECMODE_SIGN_ENABLED))) {
-                                rc = cifs_verify_signature(in_buf,
+                                rc = cifs_verify_signature(midQ->resp_buf,
                                                ses->server->mac_signing_key,
                                                midQ->sequence_number+1);
                                if(rc) {
@@ -512,18 +514,19 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
                                }
                        }
-                        *pbytes_returned = in_buf->smb_buf_length;
                        /* BB special case reconnect tid and uid here? */
                        /* BB special case Errbadpassword and pwdexpired here */
-                        rc = map_smb_to_linux_error(in_buf);
+                        rc = map_smb_to_linux_error(midQ->resp_buf);
                        /* convert ByteCount if necessary */
                        if (receive_len >=
                            sizeof (struct smb_hdr) -
                            4 /* do not count RFC1001 header */  +
-                            (2 * in_buf->WordCount) + 2 /* bcc */ )
+                            (2 * midQ->resp_buf->WordCount) + 2 /* bcc */ )
-                                BCC(in_buf) = le16_to_cpu(BCC_LE(in_buf));
+                                BCC(midQ->resp_buf) = 
+                                        le16_to_cpu(BCC_LE(midQ->resp_buf));
+                        midQ->resp_buf = NULL;  /* mark it so will not be freed
+                                                by DeleteMidQEntry */
                } else {
                        rc = -EIO;
                        cFYI(1,("Bad MID state?"));
@@ -549,7 +552,6 @@ out_unlock2:
        return rc;
 }
-#endif /* CIFS_EXPERIMENTAL */
 int
 SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
@@ -790,7 +792,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
                                BCC(out_buf) = le16_to_cpu(BCC_LE(out_buf));
                } else {
                        rc = -EIO;
-                        cERROR(1,("Bad MID state? "));
+                        cERROR(1,("Bad MID state?"));
                }
        }
 cifs_no_response_exit:
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index f375f87c7dbd..777e3363c2a4 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -254,7 +254,8 @@ ssize_t cifs_getxattr(struct dentry * direntry, const char * ea_name,
                rc = CIFSSMBQueryEA(xid,pTcon,full_path,ea_name,ea_value,
                        buf_size, cifs_sb->local_nls,
                        cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
-        } else if(strncmp(ea_name,POSIX_ACL_XATTR_ACCESS,strlen(POSIX_ACL_XATTR_ACCESS)) == 0) {
+        } else if(strncmp(ea_name,POSIX_ACL_XATTR_ACCESS,
+                          strlen(POSIX_ACL_XATTR_ACCESS)) == 0) {
 #ifdef CONFIG_CIFS_POSIX
                if(sb->s_flags & MS_POSIXACL)
                        rc = CIFSSMBGetPosixACL(xid, pTcon, full_path,
@@ -262,10 +263,27 @@ ssize_t cifs_getxattr(struct dentry * direntry, const char * ea_name,
                                cifs_sb->local_nls,
                                cifs_sb->mnt_cifs_flags & 
                                        CIFS_MOUNT_MAP_SPECIAL_CHR);
+/*              else if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
+                        __u16 fid;
+                        int oplock = FALSE;
+                        rc = CIFSSMBOpen(xid, pTcon, full_path,
+                                         FILE_OPEN, GENERIC_READ, 0, &fid,
+                                         &oplock, NULL, cifs_sb->local_nls,
+                                         cifs_sb->mnt_cifs_flags &
+                                         CIFS_MOUNT_MAP_SPECIAL_CHR);
+                        if(rc == 0) {
+                                rc = CIFSSMBGetCIFSACL(xid, pTcon, fid,
+                                        ea_value, buf_size,
+                                        ACL_TYPE_ACCESS);
+                                CIFSSMBClose(xid, pTcon, fid)
+                        }
+                } */  /* BB enable after fixing up return data */
+                                
 #else 
                cFYI(1,("query POSIX ACL not supported yet"));
 #endif /* CONFIG_CIFS_POSIX */
-        } else if(strncmp(ea_name,POSIX_ACL_XATTR_DEFAULT,strlen(POSIX_ACL_XATTR_DEFAULT)) == 0) {
+        } else if(strncmp(ea_name,POSIX_ACL_XATTR_DEFAULT,
+                          strlen(POSIX_ACL_XATTR_DEFAULT)) == 0) {
 #ifdef CONFIG_CIFS_POSIX
                if(sb->s_flags & MS_POSIXACL)
                        rc = CIFSSMBGetPosixACL(xid, pTcon, full_path,
diff --git a/fs/coda/cache.c b/fs/coda/cache.c
index 80072fd9b7fa..c607d923350a 100644
--- a/fs/coda/cache.c
+++ b/fs/coda/cache.c
@@ -93,7 +93,7 @@ static void coda_flag_children(struct dentry *parent, int flag)
        spin_lock(&dcache_lock);
        list_for_each(child, &parent->d_subdirs)
        {
-                de = list_entry(child, struct dentry, d_child);
+                de = list_entry(child, struct dentry, d_u.d_child);
                /* don't know what to do with negative dentries */
                if ( ! de->d_inode ) 
                        continue;
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 2391766e9c7c..8f1a517f8b4e 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -453,7 +453,7 @@ int coda_readdir(struct file *coda_file, void *dirent, filldir_t filldir)
        coda_vfs_stat.readdir++;
        host_inode = host_file->f_dentry->d_inode;
-        down(&host_inode->i_sem);
+        mutex_lock(&host_inode->i_mutex);
        host_file->f_pos = coda_file->f_pos;
        if (!host_file->f_op->readdir) {
@@ -475,7 +475,7 @@ int coda_readdir(struct file *coda_file, void *dirent, filldir_t filldir)
        }
 out:
        coda_file->f_pos = host_file->f_pos;
-        up(&host_inode->i_sem);
+        mutex_unlock(&host_inode->i_mutex);
        return ret;
 }
diff --git a/fs/coda/file.c b/fs/coda/file.c
index e6bc022568f3..30b4630bd735 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -77,14 +77,14 @@ coda_file_write(struct file *coda_file, const char __user *buf, size_t count, lo
                return -EINVAL;
        host_inode = host_file->f_dentry->d_inode;
-        down(&coda_inode->i_sem);
+        mutex_lock(&coda_inode->i_mutex);
        ret = host_file->f_op->write(host_file, buf, count, ppos);
        coda_inode->i_size = host_inode->i_size;
        coda_inode->i_blocks = (coda_inode->i_size + 511) >> 9;
        coda_inode->i_mtime = coda_inode->i_ctime = CURRENT_TIME_SEC;
-        up(&coda_inode->i_sem);
+        mutex_unlock(&coda_inode->i_mutex);
        return ret;
 }
@@ -272,9 +272,9 @@ int coda_fsync(struct file *coda_file, struct dentry *coda_dentry, int datasync)
        if (host_file->f_op && host_file->f_op->fsync) {
                host_dentry = host_file->f_dentry;
                host_inode = host_dentry->d_inode;
-                down(&host_inode->i_sem);
+                mutex_lock(&host_inode->i_mutex);
                err = host_file->f_op->fsync(host_file, host_dentry, datasync);
-                up(&host_inode->i_sem);
+                mutex_unlock(&host_inode->i_mutex);
        }
        if ( !err && !datasync ) {
diff --git a/fs/compat.c b/fs/compat.c
index 55ac0324aaf1..ff0bafcff720 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -53,6 +53,8 @@
 #include <asm/mmu_context.h>
 #include <asm/ioctls.h>
+extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
 /*
 * Not all architectures have sys_utime, so implement this in terms
 * of sys_utimes.
@@ -68,10 +70,10 @@ asmlinkage long compat_sys_utime(char __user *filename, struct compat_utimbuf __
                tv[0].tv_usec = 0;
                tv[1].tv_usec = 0;
        }
-        return do_utimes(filename, t ? tv : NULL);
+        return do_utimes(AT_FDCWD, filename, t ? tv : NULL);
 }
-asmlinkage long compat_sys_utimes(char __user *filename, struct compat_timeval __user *t)
+asmlinkage long compat_sys_futimesat(int dfd, char __user *filename, struct compat_timeval __user *t)
 {
        struct timeval tv[2];
@@ -82,14 +84,19 @@ asmlinkage long compat_sys_utimes(char __user *filename, struct compat_timeval _
                    get_user(tv[1].tv_usec, &t[1].tv_usec))
                        return -EFAULT; 
        } 
-        return do_utimes(filename, t ? tv : NULL);
+        return do_utimes(dfd, filename, t ? tv : NULL);
+}
+asmlinkage long compat_sys_utimes(char __user *filename, struct compat_timeval __user *t)
+{
+        return compat_sys_futimesat(AT_FDCWD, filename, t);
 }
 asmlinkage long compat_sys_newstat(char __user * filename,
                struct compat_stat __user *statbuf)
 {
        struct kstat stat;
-        int error = vfs_stat(filename, &stat);
+        int error = vfs_stat_fd(AT_FDCWD, filename, &stat);
        if (!error)
                error = cp_compat_stat(&stat, statbuf);
@@ -100,10 +107,31 @@ asmlinkage long compat_sys_newlstat(char __user * filename,
                struct compat_stat __user *statbuf)
 {
        struct kstat stat;
-        int error = vfs_lstat(filename, &stat);
+        int error = vfs_lstat_fd(AT_FDCWD, filename, &stat);
+        if (!error)
+                error = cp_compat_stat(&stat, statbuf);
+        return error;
+}
+asmlinkage long compat_sys_newfstatat(int dfd, char __user *filename,
+                struct compat_stat __user *statbuf, int flag)
+{
+        struct kstat stat;
+        int error = -EINVAL;
+        if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
+                goto out;
+        if (flag & AT_SYMLINK_NOFOLLOW)
+                error = vfs_lstat_fd(dfd, filename, &stat);
+        else
+                error = vfs_stat_fd(dfd, filename, &stat);
        if (!error)
                error = cp_compat_stat(&stat, statbuf);
+out:
        return error;
 }
@@ -494,9 +522,21 @@ asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd,
                ret = sys_fcntl(fd, cmd, (unsigned long)&f);
                set_fs(old_fs);
                if (cmd == F_GETLK && ret == 0) {
-                        if ((f.l_start >= COMPAT_OFF_T_MAX) ||
+                        /* GETLK was successfule and we need to return the data...
-                            ((f.l_start + f.l_len) > COMPAT_OFF_T_MAX))
+                         * but it needs to fit in the compat structure.
+                         * l_start shouldn't be too big, unless the original
+                         * start + end is greater than COMPAT_OFF_T_MAX, in which
+                         * case the app was asking for trouble, so we return
+                         * -EOVERFLOW in that case.
+                         * l_len could be too big, in which case we just truncate it,
+                         * and only allow the app to see that part of the conflicting
+                         * lock that might make sense to it anyway
+                         */
+                        if (f.l_start > COMPAT_OFF_T_MAX)
                                ret = -EOVERFLOW;
+                        if (f.l_len > COMPAT_OFF_T_MAX)
+                                f.l_len = COMPAT_OFF_T_MAX;
                        if (ret == 0)
                                ret = put_compat_flock(&f, compat_ptr(arg));
                }
@@ -515,9 +555,11 @@ asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd,
                                (unsigned long)&f);
                set_fs(old_fs);
                if (cmd == F_GETLK64 && ret == 0) {
-                        if ((f.l_start >= COMPAT_LOFF_T_MAX) ||
+                        /* need to return lock information - see above for commentary */
-                            ((f.l_start + f.l_len) > COMPAT_LOFF_T_MAX))
+                        if (f.l_start > COMPAT_LOFF_T_MAX)
                                ret = -EOVERFLOW;
+                        if (f.l_len > COMPAT_LOFF_T_MAX)
+                                f.l_len = COMPAT_LOFF_T_MAX;
                        if (ret == 0)
                                ret = put_compat_flock64(&f, compat_ptr(arg));
                }
@@ -1276,7 +1318,17 @@ out:
 asmlinkage long
 compat_sys_open(const char __user *filename, int flags, int mode)
 {
-        return do_sys_open(filename, flags, mode);
+        return do_sys_open(AT_FDCWD, filename, flags, mode);
+}
+/*
+ * Exactly like fs/open.c:sys_openat(), except that it doesn't set the
+ * O_LARGEFILE flag.
+ */
+asmlinkage long
+compat_sys_openat(int dfd, const char __user *filename, int flags, int mode)
+{
+        return do_sys_open(dfd, filename, flags, mode);
 }
 /*
@@ -1523,7 +1575,7 @@ out_ret:
 * Ooo, nasty.  We need here to frob 32-bit unsigned longs to
 * 64-bit unsigned longs.
 */
-static inline
+static
 int compat_get_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
                        unsigned long *fdset)
 {
@@ -1556,7 +1608,7 @@ int compat_get_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
        return 0;
 }
-static inline
+static
 void compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
                        unsigned long *fdset)
 {
@@ -1607,36 +1659,14 @@ static void select_bits_free(void *bits, int size)
 #define MAX_SELECT_SECONDS \
        ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
-asmlinkage long
+int compat_core_sys_select(int n, compat_ulong_t __user *inp,
-compat_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp,
+        compat_ulong_t __user *outp, compat_ulong_t __user *exp, s64 *timeout)
-                compat_ulong_t __user *exp, struct compat_timeval __user *tvp)
 {
        fd_set_bits fds;
        char *bits;
-        long timeout;
        int size, max_fdset, ret = -EINVAL;
        struct fdtable *fdt;
-        timeout = MAX_SCHEDULE_TIMEOUT;
-        if (tvp) {
-                time_t sec, usec;
-                if (!access_ok(VERIFY_READ, tvp, sizeof(*tvp))
-                    || __get_user(sec, &tvp->tv_sec)
-                    || __get_user(usec, &tvp->tv_usec)) {
-                        ret = -EFAULT;
-                        goto out_nofds;
-                }
-                if (sec < 0 || usec < 0)
-                        goto out_nofds;
-                if ((unsigned long) sec < MAX_SELECT_SECONDS) {
-                        timeout = ROUND_UP(usec, 1000000/HZ);
-                        timeout += sec * (unsigned long) HZ;
-                }
-        }
        if (n < 0)
                goto out_nofds;
@@ -1673,19 +1703,7 @@ compat_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp
        zero_fd_set(n, fds.res_out);
        zero_fd_set(n, fds.res_ex);
-        ret = do_select(n, &fds, &timeout);
+        ret = do_select(n, &fds, timeout);
-        if (tvp && !(current->personality & STICKY_TIMEOUTS)) {
-                time_t sec = 0, usec = 0;
-                if (timeout) {
-                        sec = timeout / HZ;
-                        usec = timeout % HZ;
-                        usec *= (1000000/HZ);
-                }
-                if (put_user(sec, &tvp->tv_sec) ||
-                    put_user(usec, &tvp->tv_usec))
-                        ret = -EFAULT;
-        }
        if (ret < 0)
                goto out;
@@ -1706,6 +1724,224 @@ out_nofds:
        return ret;
 }
+asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
+        compat_ulong_t __user *outp, compat_ulong_t __user *exp,
+        struct compat_timeval __user *tvp)
+{
+        s64 timeout = -1;
+        struct compat_timeval tv;
+        int ret;
+        if (tvp) {
+                if (copy_from_user(&tv, tvp, sizeof(tv)))
+                        return -EFAULT;
+                if (tv.tv_sec < 0 || tv.tv_usec < 0)
+                        return -EINVAL;
+                /* Cast to u64 to make GCC stop complaining */
+                if ((u64)tv.tv_sec >= (u64)MAX_INT64_SECONDS)
+                        timeout = -1;   /* infinite */
+                else {
+                        timeout = ROUND_UP(tv.tv_usec, 1000000/HZ);
+                        timeout += tv.tv_sec * HZ;
+                }
+        }
+        ret = compat_core_sys_select(n, inp, outp, exp, &timeout);
+        if (tvp) {
+                if (current->personality & STICKY_TIMEOUTS)
+                        goto sticky;
+                tv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ));
+                tv.tv_sec = timeout;
+                if (copy_to_user(tvp, &tv, sizeof(tv))) {
+sticky:
+                        /*
+                         * If an application puts its timeval in read-only
+                         * memory, we don't want the Linux-specific update to
+                         * the timeval to cause a fault after the select has
+                         * completed successfully. However, because we're not
+                         * updating the timeval, we can't restart the system
+                         * call.
+                         */
+                        if (ret == -ERESTARTNOHAND)
+                                ret = -EINTR;
+                }
+        }
+        return ret;
+}
+#ifdef TIF_RESTORE_SIGMASK
+asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp,
+        compat_ulong_t __user *outp, compat_ulong_t __user *exp,
+        struct compat_timespec __user *tsp, compat_sigset_t __user *sigmask,
+        compat_size_t sigsetsize)
+{
+        compat_sigset_t ss32;
+        sigset_t ksigmask, sigsaved;
+        long timeout = MAX_SCHEDULE_TIMEOUT;
+        struct compat_timespec ts;
+        int ret;
+        if (tsp) {
+                if (copy_from_user(&ts, tsp, sizeof(ts)))
+                        return -EFAULT;
+                if (ts.tv_sec < 0 || ts.tv_nsec < 0)
+                        return -EINVAL;
+        }
+        if (sigmask) {
+                if (sigsetsize != sizeof(compat_sigset_t))
+                        return -EINVAL;
+                if (copy_from_user(&ss32, sigmask, sizeof(ss32)))
+                        return -EFAULT;
+                sigset_from_compat(&ksigmask, &ss32);
+                sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
+                sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+        }
+        do {
+                if (tsp) {
+                        if ((unsigned long)ts.tv_sec < MAX_SELECT_SECONDS) {
+                                timeout = ROUND_UP(ts.tv_nsec, 1000000000/HZ);
+                                timeout += ts.tv_sec * (unsigned long)HZ;
+                                ts.tv_sec = 0;
+                                ts.tv_nsec = 0;
+                        } else {
+                                ts.tv_sec -= MAX_SELECT_SECONDS;
+                                timeout = MAX_SELECT_SECONDS * HZ;
+                        }
+                }
+                ret = compat_core_sys_select(n, inp, outp, exp, &timeout);
+        } while (!ret && !timeout && tsp && (ts.tv_sec || ts.tv_nsec));
+        if (tsp && !(current->personality & STICKY_TIMEOUTS)) {
+                ts.tv_sec += timeout / HZ;
+                ts.tv_nsec += (timeout % HZ) * (1000000000/HZ);
+                if (ts.tv_nsec >= 1000000000) {
+                        ts.tv_sec++;
+                        ts.tv_nsec -= 1000000000;
+                }
+                (void)copy_to_user(tsp, &ts, sizeof(ts));
+        }
+        if (ret == -ERESTARTNOHAND) {
+                /*
+                 * Don't restore the signal mask yet. Let do_signal() deliver
+                 * the signal on the way back to userspace, before the signal
+                 * mask is restored.
+                 */
+                if (sigmask) {
+                        memcpy(&current->saved_sigmask, &sigsaved,
+                                        sizeof(sigsaved));
+                        set_thread_flag(TIF_RESTORE_SIGMASK);
+                }
+        } else if (sigmask)
+                sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+        return ret;
+}
+asmlinkage long compat_sys_pselect6(int n, compat_ulong_t __user *inp,
+        compat_ulong_t __user *outp, compat_ulong_t __user *exp,
+        struct compat_timespec __user *tsp, void __user *sig)
+{
+        compat_size_t sigsetsize = 0;
+        compat_uptr_t up = 0;
+        if (sig) {
+                if (!access_ok(VERIFY_READ, sig,
+                                sizeof(compat_uptr_t)+sizeof(compat_size_t)) ||
+                        __get_user(up, (compat_uptr_t __user *)sig) ||
+                        __get_user(sigsetsize,
+                                (compat_size_t __user *)(sig+sizeof(up))))
+                        return -EFAULT;
+        }
+        return compat_sys_pselect7(n, inp, outp, exp, tsp, compat_ptr(up),
+                                        sigsetsize);
+}
+asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
+        unsigned int nfds, struct compat_timespec __user *tsp,
+        const compat_sigset_t __user *sigmask, compat_size_t sigsetsize)
+{
+        compat_sigset_t ss32;
+        sigset_t ksigmask, sigsaved;
+        struct compat_timespec ts;
+        s64 timeout = -1;
+        int ret;
+        if (tsp) {
+                if (copy_from_user(&ts, tsp, sizeof(ts)))
+                        return -EFAULT;
+                /* We assume that ts.tv_sec is always lower than
+                   the number of seconds that can be expressed in
+                   an s64. Otherwise the compiler bitches at us */
+                timeout = ROUND_UP(ts.tv_nsec, 1000000000/HZ);
+                timeout += ts.tv_sec * HZ;
+        }
+        if (sigmask) {
+                if (sigsetsize |= sizeof(compat_sigset_t))
+                        return -EINVAL;
+                if (copy_from_user(&ss32, sigmask, sizeof(ss32)))
+                        return -EFAULT;
+                sigset_from_compat(&ksigmask, &ss32);
+                sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
+                sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+        }
+        ret = do_sys_poll(ufds, nfds, &timeout);
+        /* We can restart this syscall, usually */
+        if (ret == -EINTR) {
+                /*
+                 * Don't restore the signal mask yet. Let do_signal() deliver
+                 * the signal on the way back to userspace, before the signal
+                 * mask is restored.
+                 */
+                if (sigmask) {
+                        memcpy(&current->saved_sigmask, &sigsaved,
+                                sizeof(sigsaved));
+                        set_thread_flag(TIF_RESTORE_SIGMASK);
+                }
+                ret = -ERESTARTNOHAND;
+        } else if (sigmask)
+                sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+        if (tsp && timeout >= 0) {
+                if (current->personality & STICKY_TIMEOUTS)
+                        goto sticky;
+                /* Yes, we know it's actually an s64, but it's also positive. */
+                ts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * 1000;
+                ts.tv_sec = timeout;
+                if (copy_to_user(tsp, &ts, sizeof(ts))) {
+sticky:
+                        /*
+                         * If an application puts its timeval in read-only
+                         * memory, we don't want the Linux-specific update to
+                         * the timeval to cause a fault after the select has
+                         * completed successfully. However, because we're not
+                         * updating the timeval, we can't restart the system
+                         * call.
+                         */
+                        if (ret == -ERESTARTNOHAND && timeout >= 0)
+                                ret = -EINTR;
+                }
+        }
+        return ret;
+}
+#endif /* TIF_RESTORE_SIGMASK */
 #if defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE)
 /* Stuff for NFS server syscalls... */
 struct compat_nfsctl_svc {
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 43a2508ac696..5dd0207ffd46 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -10,11 +10,11 @@
 * ioctls.
 */
-#ifdef INCLUDES
 #include <linux/config.h>
 #include <linux/types.h>
 #include <linux/compat.h>
 #include <linux/kernel.h>
+#include <linux/capability.h>
 #include <linux/compiler.h>
 #include <linux/sched.h>
 #include <linux/smp.h>
@@ -81,13 +81,9 @@
 #include <linux/capi.h>
 #include <scsi/scsi.h>
-/* Ugly hack. */
-#undef __KERNEL__
 #include <scsi/scsi_ioctl.h>
-#define __KERNEL__
 #include <scsi/sg.h>
-#include <asm/types.h>
 #include <asm/uaccess.h>
 #include <linux/ethtool.h>
 #include <linux/mii.h>
@@ -95,7 +91,6 @@
 #include <linux/watchdog.h>
 #include <linux/dm-ioctl.h>
-#include <asm/module.h>
 #include <linux/soundcard.h>
 #include <linux/lp.h>
 #include <linux/ppdev.h>
@@ -127,11 +122,7 @@
 #include <linux/dvb/dmx.h>
 #include <linux/dvb/frontend.h>
 #include <linux/dvb/video.h>
+#include <linux/lp.h>
-#undef INCLUDES
-#endif
-#ifdef CODE
 /* Aiee. Someone does not find a difference between int and long */
 #define EXT2_IOC32_GETFLAGS               _IOR('f', 1, int)
@@ -148,6 +139,12 @@
 #define EXT2_IOC32_GETVERSION             _IOR('v', 1, int)
 #define EXT2_IOC32_SETVERSION             _IOW('v', 2, int)
+static int do_ioctl32_pointer(unsigned int fd, unsigned int cmd,
+                              unsigned long arg, struct file *f)
+{
+        return sys_ioctl(fd, cmd, (unsigned long)compat_ptr(arg));
+}
 static int w_long(unsigned int fd, unsigned int cmd, unsigned long arg)
 {
        mm_segment_t old_fs = get_fs();
@@ -207,244 +204,6 @@ static int do_ext3_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
        return sys_ioctl(fd, cmd, (unsigned long)compat_ptr(arg));
 }
-struct video_tuner32 {
-        compat_int_t tuner;
-        char name[32];
-        compat_ulong_t rangelow, rangehigh;
-        u32 flags;      /* It is really u32 in videodev.h */
-        u16 mode, signal;
-};
-static int get_video_tuner32(struct video_tuner *kp, struct video_tuner32 __user *up)
-{
-        int i;
-        if(get_user(kp->tuner, &up->tuner))
-                return -EFAULT;
-        for(i = 0; i < 32; i++)
-                __get_user(kp->name[i], &up->name[i]);
-        __get_user(kp->rangelow, &up->rangelow);
-        __get_user(kp->rangehigh, &up->rangehigh);
-        __get_user(kp->flags, &up->flags);
-        __get_user(kp->mode, &up->mode);
-        __get_user(kp->signal, &up->signal);
-        return 0;
-}
-static int put_video_tuner32(struct video_tuner *kp, struct video_tuner32 __user *up)
-{
-        int i;
-        if(put_user(kp->tuner, &up->tuner))
-                return -EFAULT;
-        for(i = 0; i < 32; i++)
-                __put_user(kp->name[i], &up->name[i]);
-        __put_user(kp->rangelow, &up->rangelow);
-        __put_user(kp->rangehigh, &up->rangehigh);
-        __put_user(kp->flags, &up->flags);
-        __put_user(kp->mode, &up->mode);
-        __put_user(kp->signal, &up->signal);
-        return 0;
-}
-struct video_buffer32 {
-        compat_caddr_t base;
-        compat_int_t height, width, depth, bytesperline;
-};
-static int get_video_buffer32(struct video_buffer *kp, struct video_buffer32 __user *up)
-{
-        u32 tmp;
-        if (get_user(tmp, &up->base))
-                return -EFAULT;
-        /* This is actually a physical address stored
-         * as a void pointer.
-         */
-        kp->base = (void *)(unsigned long) tmp;
-        __get_user(kp->height, &up->height);
-        __get_user(kp->width, &up->width);
-        __get_user(kp->depth, &up->depth);
-        __get_user(kp->bytesperline, &up->bytesperline);
-        return 0;
-}
-static int put_video_buffer32(struct video_buffer *kp, struct video_buffer32 __user *up)
-{
-        u32 tmp = (u32)((unsigned long)kp->base);
-        if(put_user(tmp, &up->base))
-                return -EFAULT;
-        __put_user(kp->height, &up->height);
-        __put_user(kp->width, &up->width);
-        __put_user(kp->depth, &up->depth);
-        __put_user(kp->bytesperline, &up->bytesperline);
-        return 0;
-}
-struct video_clip32 {
-        s32 x, y, width, height;        /* Its really s32 in videodev.h */
-        compat_caddr_t next;
-};
-struct video_window32 {
-        u32 x, y, width, height, chromakey, flags;
-        compat_caddr_t clips;
-        compat_int_t clipcount;
-};
-/* You get back everything except the clips... */
-static int put_video_window32(struct video_window *kp, struct video_window32 __user *up)
-{
-        if(put_user(kp->x, &up->x))
-                return -EFAULT;
-        __put_user(kp->y, &up->y);
-        __put_user(kp->width, &up->width);
-        __put_user(kp->height, &up->height);
-        __put_user(kp->chromakey, &up->chromakey);
-        __put_user(kp->flags, &up->flags);
-        __put_user(kp->clipcount, &up->clipcount);
-        return 0;
-}
-#define VIDIOCGTUNER32          _IOWR('v',4, struct video_tuner32)
-#define VIDIOCSTUNER32          _IOW('v',5, struct video_tuner32)
-#define VIDIOCGWIN32            _IOR('v',9, struct video_window32)
-#define VIDIOCSWIN32            _IOW('v',10, struct video_window32)
-#define VIDIOCGFBUF32           _IOR('v',11, struct video_buffer32)
-#define VIDIOCSFBUF32           _IOW('v',12, struct video_buffer32)
-#define VIDIOCGFREQ32           _IOR('v',14, u32)
-#define VIDIOCSFREQ32           _IOW('v',15, u32)
-enum {
-        MaxClips = (~0U-sizeof(struct video_window))/sizeof(struct video_clip)
-};
-static int do_set_window(unsigned int fd, unsigned int cmd, unsigned long arg)
-{
-        struct video_window32 __user *up = compat_ptr(arg);
-        struct video_window __user *vw;
-        struct video_clip __user *p;
-        int nclips;
-        u32 n;
-        if (get_user(nclips, &up->clipcount))
-                return -EFAULT;
-        /* Peculiar interface... */
-        if (nclips < 0)
-                nclips = VIDEO_CLIPMAP_SIZE;
-        if (nclips > MaxClips)
-                return -ENOMEM;
-        vw = compat_alloc_user_space(sizeof(struct video_window) +
-                                    nclips * sizeof(struct video_clip));
-        p = nclips ? (struct video_clip __user *)(vw + 1) : NULL;
-        if (get_user(n, &up->x) || put_user(n, &vw->x) ||
-            get_user(n, &up->y) || put_user(n, &vw->y) ||
-            get_user(n, &up->width) || put_user(n, &vw->width) ||
-            get_user(n, &up->height) || put_user(n, &vw->height) ||
-            get_user(n, &up->chromakey) || put_user(n, &vw->chromakey) ||
-            get_user(n, &up->flags) || put_user(n, &vw->flags) ||
-            get_user(n, &up->clipcount) || put_user(n, &vw->clipcount) ||
-            get_user(n, &up->clips) || put_user(p, &vw->clips))
-                return -EFAULT;
-        if (nclips) {
-                struct video_clip32 __user *u = compat_ptr(n);
-                int i;
-                if (!u)
-                        return -EINVAL;
-                for (i = 0; i < nclips; i++, u++, p++) {
-                        s32 v;
-                        if (get_user(v, &u->x) ||
-                            put_user(v, &p->x) ||
-                            get_user(v, &u->y) ||
-                            put_user(v, &p->y) ||
-                            get_user(v, &u->width) ||
-                            put_user(v, &p->width) ||
-                            get_user(v, &u->height) ||
-                            put_user(v, &p->height) ||
-                            put_user(NULL, &p->next))
-                                return -EFAULT;
-                }
-        }
-        return sys_ioctl(fd, VIDIOCSWIN, (unsigned long)p);
-}
-static int do_video_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
-{
-        union {
-                struct video_tuner vt;
-                struct video_buffer vb;
-                struct video_window vw;
-                unsigned long vx;
-        } karg;
-        mm_segment_t old_fs = get_fs();
-        void __user *up = compat_ptr(arg);
-        int err = 0;
-        /* First, convert the command. */
-        switch(cmd) {
-        case VIDIOCGTUNER32: cmd = VIDIOCGTUNER; break;
-        case VIDIOCSTUNER32: cmd = VIDIOCSTUNER; break;
-        case VIDIOCGWIN32: cmd = VIDIOCGWIN; break;
-        case VIDIOCGFBUF32: cmd = VIDIOCGFBUF; break;
-        case VIDIOCSFBUF32: cmd = VIDIOCSFBUF; break;
-        case VIDIOCGFREQ32: cmd = VIDIOCGFREQ; break;
-        case VIDIOCSFREQ32: cmd = VIDIOCSFREQ; break;
-        };
-        switch(cmd) {
-        case VIDIOCSTUNER:
-        case VIDIOCGTUNER:
-                err = get_video_tuner32(&karg.vt, up);
-                break;
-        case VIDIOCSFBUF:
-                err = get_video_buffer32(&karg.vb, up);
-                break;
-        case VIDIOCSFREQ:
-                err = get_user(karg.vx, (u32 __user *)up);
-                break;
-        };
-        if(err)
-                goto out;
-        set_fs(KERNEL_DS);
-        err = sys_ioctl(fd, cmd, (unsigned long)&karg);
-        set_fs(old_fs);
-        if(err == 0) {
-                switch(cmd) {
-                case VIDIOCGTUNER:
-                        err = put_video_tuner32(&karg.vt, up);
-                        break;
-                case VIDIOCGWIN:
-                        err = put_video_window32(&karg.vw, up);
-                        break;
-                case VIDIOCGFBUF:
-                        err = put_video_buffer32(&karg.vb, up);
-                        break;
-                case VIDIOCGFREQ:
-                        err = put_user(((u32)karg.vx), (u32 __user *)up);
-                        break;
-                };
-        }
-out:
-        return err;
-}
 struct compat_dmx_event {
        dmx_event_t     event;
        compat_time_t   timeStamp;
@@ -1158,6 +917,40 @@ static int sg_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
        return err;
 }
+struct compat_sg_req_info { /* used by SG_GET_REQUEST_TABLE ioctl() */
+        char req_state;
+        char orphan;
+        char sg_io_owned;
+        char problem;
+        int pack_id;
+        compat_uptr_t usr_ptr;
+        unsigned int duration;
+        int unused;
+};
+static int sg_grt_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+        int err, i;
+        sg_req_info_t *r;
+        struct compat_sg_req_info *o = (struct compat_sg_req_info *)arg;
+        r = compat_alloc_user_space(sizeof(sg_req_info_t)*SG_MAX_QUEUE);
+        err = sys_ioctl(fd,cmd,(unsigned long)r);
+        if (err < 0)
+                return err;
+        for (i = 0; i < SG_MAX_QUEUE; i++) {
+                void __user *ptr;
+                int d;
+                if (copy_in_user(o + i, r + i, offsetof(sg_req_info_t, usr_ptr)) ||
+                    get_user(ptr, &r[i].usr_ptr) ||
+                    get_user(d, &r[i].duration) ||
+                    put_user((u32)(unsigned long)(ptr), &o[i].usr_ptr) ||
+                    put_user(d, &o[i].duration))
+                        return -EFAULT;
+        }
+        return err;
+}
 struct sock_fprog32 {
        unsigned short  len;
        compat_caddr_t  filter;
@@ -2713,6 +2506,49 @@ static int old_bridge_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg
        return -EINVAL;
 }
+#define RTC_IRQP_READ32         _IOR('p', 0x0b, compat_ulong_t)
+#define RTC_IRQP_SET32          _IOW('p', 0x0c, compat_ulong_t)
+#define RTC_EPOCH_READ32        _IOR('p', 0x0d, compat_ulong_t)
+#define RTC_EPOCH_SET32         _IOW('p', 0x0e, compat_ulong_t)
+static int rtc_ioctl(unsigned fd, unsigned cmd, unsigned long arg)
+{
+        mm_segment_t oldfs = get_fs();
+        compat_ulong_t val32;
+        unsigned long kval;
+        int ret;
+        switch (cmd) {
+        case RTC_IRQP_READ32:
+        case RTC_EPOCH_READ32:
+                set_fs(KERNEL_DS);
+                ret = sys_ioctl(fd, (cmd == RTC_IRQP_READ32) ?
+                                        RTC_IRQP_READ : RTC_EPOCH_READ,
+                                        (unsigned long)&kval);
+                set_fs(oldfs);
+                if (ret)
+                        return ret;
+                val32 = kval;
+                return put_user(val32, (unsigned int __user *)arg);
+        case RTC_IRQP_SET32:
+        case RTC_EPOCH_SET32:
+                ret = get_user(val32, (unsigned int __user *)arg);
+                if (ret)
+                        return ret;
+                kval = val32;
+                set_fs(KERNEL_DS);
+                ret = sys_ioctl(fd, (cmd == RTC_IRQP_SET32) ?
+                                RTC_IRQP_SET : RTC_EPOCH_SET,
+                                (unsigned long)&kval);
+                set_fs(oldfs);
+                return ret;
+        default:
+                /* unreached */
+                return -ENOIOCTLCMD;
+        }
+}
 #if defined(CONFIG_NCP_FS) || defined(CONFIG_NCP_FS_MODULE)
 struct ncp_ioctl_request_32 {
        u32 function;
@@ -2900,10 +2736,34 @@ static int do_ncp_setprivatedata(unsigned int fd, unsigned int cmd, unsigned lon
 }
 #endif
-#undef CODE
+static int
-#endif
+lp_timeout_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+        struct compat_timeval *tc = (struct compat_timeval *)arg;
+        struct timeval *tn = compat_alloc_user_space(sizeof(struct timeval));
+        struct timeval ts;
+        if (get_user(ts.tv_sec, &tc->tv_sec) ||
+            get_user(ts.tv_usec, &tc->tv_usec) ||
+            put_user(ts.tv_sec, &tn->tv_sec) ||
+            put_user(ts.tv_usec, &tn->tv_usec))
+                return -EFAULT;
+        return sys_ioctl(fd, cmd, (unsigned long)tn);
+}
+#define HANDLE_IOCTL(cmd,handler) \
+        { (cmd), (ioctl_trans_handler_t)(handler) },
+/* pointer to compatible structure or no argument */
+#define COMPATIBLE_IOCTL(cmd) \
+        { (cmd), do_ioctl32_pointer },
+/* argument is an unsigned long integer, not a pointer */
+#define ULONG_IOCTL(cmd) \
+        { (cmd), (ioctl_trans_handler_t)sys_ioctl },
-#ifdef DECLARES
+struct ioctl_trans ioctl_start[] = {
+#include <linux/compat_ioctl.h>
 HANDLE_IOCTL(MEMREADOOB32, mtd_rw_oob)
 HANDLE_IOCTL(MEMWRITEOOB32, mtd_rw_oob)
 #ifdef CONFIG_NET
@@ -2983,6 +2843,7 @@ HANDLE_IOCTL(FDPOLLDRVSTAT32, fd_ioctl_trans)
 HANDLE_IOCTL(FDGETFDCSTAT32, fd_ioctl_trans)
 HANDLE_IOCTL(FDWERRORGET32, fd_ioctl_trans)
 HANDLE_IOCTL(SG_IO,sg_ioctl_trans)
+HANDLE_IOCTL(SG_GET_REQUEST_TABLE, sg_grt_trans)
 HANDLE_IOCTL(PPPIOCGIDLE32, ppp_ioctl_trans)
 HANDLE_IOCTL(PPPIOCSCOMPRESS32, ppp_ioctl_trans)
 HANDLE_IOCTL(PPPIOCSPASS32, ppp_sock_fprog_ioctl_trans)
@@ -3015,14 +2876,6 @@ COMPATIBLE_IOCTL(EXT3_IOC_GROUP_ADD)
 #ifdef CONFIG_JBD_DEBUG
 HANDLE_IOCTL(EXT3_IOC32_WAIT_FOR_READONLY, do_ext3_ioctl)
 #endif
-HANDLE_IOCTL(VIDIOCGTUNER32, do_video_ioctl)
-HANDLE_IOCTL(VIDIOCSTUNER32, do_video_ioctl)
-HANDLE_IOCTL(VIDIOCGWIN32, do_video_ioctl)
-HANDLE_IOCTL(VIDIOCSWIN32, do_set_window)
-HANDLE_IOCTL(VIDIOCGFBUF32, do_video_ioctl)
-HANDLE_IOCTL(VIDIOCSFBUF32, do_video_ioctl)
-HANDLE_IOCTL(VIDIOCGFREQ32, do_video_ioctl)
-HANDLE_IOCTL(VIDIOCSFREQ32, do_video_ioctl)
 /* One SMB ioctl needs translations. */
 #define SMB_IOC_GETMOUNTUID_32 _IOR('u', 1, compat_uid_t)
 HANDLE_IOCTL(SMB_IOC_GETMOUNTUID_32, do_smb_getmountuid)
@@ -3104,6 +2957,10 @@ HANDLE_IOCTL(SIOCSIWENCODE, do_wireless_ioctl)
 HANDLE_IOCTL(SIOCGIWENCODE, do_wireless_ioctl)
 HANDLE_IOCTL(SIOCSIFBR, old_bridge_ioctl)
 HANDLE_IOCTL(SIOCGIFBR, old_bridge_ioctl)
+HANDLE_IOCTL(RTC_IRQP_READ32, rtc_ioctl)
+HANDLE_IOCTL(RTC_IRQP_SET32, rtc_ioctl)
+HANDLE_IOCTL(RTC_EPOCH_READ32, rtc_ioctl)
+HANDLE_IOCTL(RTC_EPOCH_SET32, rtc_ioctl)
 #if defined(CONFIG_NCP_FS) || defined(CONFIG_NCP_FS_MODULE)
 HANDLE_IOCTL(NCP_IOC_NCPREQUEST_32, do_ncp_ncprequest)
@@ -3121,5 +2978,19 @@ HANDLE_IOCTL(VIDEO_GET_EVENT, do_video_get_event)
 HANDLE_IOCTL(VIDEO_STILLPICTURE, do_video_stillpicture)
 HANDLE_IOCTL(VIDEO_SET_SPU_PALETTE, do_video_set_spu_palette)
-#undef DECLARES
+/* parport */
-#endif
+COMPATIBLE_IOCTL(LPTIME)
+COMPATIBLE_IOCTL(LPCHAR)
+COMPATIBLE_IOCTL(LPABORTOPEN)
+COMPATIBLE_IOCTL(LPCAREFUL)
+COMPATIBLE_IOCTL(LPWAIT)
+COMPATIBLE_IOCTL(LPSETIRQ)
+COMPATIBLE_IOCTL(LPGETSTATUS)
+COMPATIBLE_IOCTL(LPGETSTATUS)
+COMPATIBLE_IOCTL(LPRESET)
+/*LPGETSTATS not implemented, but no kernels seem to compile it in anyways*/
+COMPATIBLE_IOCTL(LPGETFLAGS)
+HANDLE_IOCTL(LPSETTIMEOUT, lp_timeout_trans)
+};
+int ioctl_table_size = ARRAY_SIZE(ioctl_start);
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index e48b539243a1..b668ec61527e 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -288,10 +288,10 @@ static struct dentry * configfs_lookup(struct inode *dir,
 /*
 * Only subdirectories count here.  Files (CONFIGFS_NOT_PINNED) are
- * attributes and are removed by rmdir().  We recurse, taking i_sem
+ * attributes and are removed by rmdir().  We recurse, taking i_mutex
 * on all children that are candidates for default detach.  If the
 * result is clean, then configfs_detach_group() will handle dropping
- * i_sem.  If there is an error, the caller will clean up the i_sem
+ * i_mutex.  If there is an error, the caller will clean up the i_mutex
 * holders via configfs_detach_rollback().
 */
 static int configfs_detach_prep(struct dentry *dentry)
@@ -309,8 +309,8 @@ static int configfs_detach_prep(struct dentry *dentry)
                if (sd->s_type & CONFIGFS_NOT_PINNED)
                        continue;
                if (sd->s_type & CONFIGFS_USET_DEFAULT) {
-                        down(&sd->s_dentry->d_inode->i_sem);
+                        mutex_lock(&sd->s_dentry->d_inode->i_mutex);
-                        /* Mark that we've taken i_sem */
+                        /* Mark that we've taken i_mutex */
                        sd->s_type |= CONFIGFS_USET_DROPPING;
                        ret = configfs_detach_prep(sd->s_dentry);
@@ -327,7 +327,7 @@ out:
 }
 /*
- * Walk the tree, dropping i_sem wherever CONFIGFS_USET_DROPPING is
+ * Walk the tree, dropping i_mutex wherever CONFIGFS_USET_DROPPING is
 * set.
 */
 static void configfs_detach_rollback(struct dentry *dentry)
@@ -341,7 +341,7 @@ static void configfs_detach_rollback(struct dentry *dentry)
                        if (sd->s_type & CONFIGFS_USET_DROPPING) {
                                sd->s_type &= ~CONFIGFS_USET_DROPPING;
-                                up(&sd->s_dentry->d_inode->i_sem);
+                                mutex_unlock(&sd->s_dentry->d_inode->i_mutex);
                        }
                }
        }
@@ -424,11 +424,11 @@ static void detach_groups(struct config_group *group)
                /*
                 * From rmdir/unregister, a configfs_detach_prep() pass
-                 * has taken our i_sem for us.  Drop it.
+                 * has taken our i_mutex for us.  Drop it.
                 * From mkdir/register cleanup, there is no sem held.
                 */
                if (sd->s_type & CONFIGFS_USET_DROPPING)
-                        up(&child->d_inode->i_sem);
+                        mutex_unlock(&child->d_inode->i_mutex);
                d_delete(child);
                dput(child);
@@ -493,11 +493,11 @@ static int populate_groups(struct config_group *group)
                /* FYI, we're faking mkdir here
                 * I'm not sure we need this semaphore, as we're called
                 * from our parent's mkdir.  That holds our parent's
-                 * i_sem, so afaik lookup cannot continue through our
+                 * i_mutex, so afaik lookup cannot continue through our
                 * parent to find us, let alone mess with our tree.
-                 * That said, taking our i_sem is closer to mkdir
+                 * That said, taking our i_mutex is closer to mkdir
                 * emulation, and shouldn't hurt. */
-                down(&dentry->d_inode->i_sem);
+                mutex_lock(&dentry->d_inode->i_mutex);
                for (i = 0; group->default_groups[i]; i++) {
                        new_group = group->default_groups[i];
@@ -507,7 +507,7 @@ static int populate_groups(struct config_group *group)
                                break;
                }
-                up(&dentry->d_inode->i_sem);
+                mutex_unlock(&dentry->d_inode->i_mutex);
        }
        if (ret)
@@ -856,7 +856,7 @@ int configfs_rename_dir(struct config_item * item, const char *new_name)
        down_write(&configfs_rename_sem);
        parent = item->parent->dentry;
-        down(&parent->d_inode->i_sem);
+        mutex_lock(&parent->d_inode->i_mutex);
        new_dentry = lookup_one_len(new_name, parent, strlen(new_name));
        if (!IS_ERR(new_dentry)) {
@@ -872,7 +872,7 @@ int configfs_rename_dir(struct config_item * item, const char *new_name)
                        error = -EEXIST;
                dput(new_dentry);
        }
-        up(&parent->d_inode->i_sem);
+        mutex_unlock(&parent->d_inode->i_mutex);
        up_write(&configfs_rename_sem);
        return error;
@@ -884,9 +884,9 @@ static int configfs_dir_open(struct inode *inode, struct file *file)
        struct dentry * dentry = file->f_dentry;
        struct configfs_dirent * parent_sd = dentry->d_fsdata;
-        down(&dentry->d_inode->i_sem);
+        mutex_lock(&dentry->d_inode->i_mutex);
        file->private_data = configfs_new_dirent(parent_sd, NULL);
-        up(&dentry->d_inode->i_sem);
+        mutex_unlock(&dentry->d_inode->i_mutex);
        return file->private_data ? 0 : -ENOMEM;
@@ -897,9 +897,9 @@ static int configfs_dir_close(struct inode *inode, struct file *file)
        struct dentry * dentry = file->f_dentry;
        struct configfs_dirent * cursor = file->private_data;
-        down(&dentry->d_inode->i_sem);
+        mutex_lock(&dentry->d_inode->i_mutex);
        list_del_init(&cursor->s_sibling);
-        up(&dentry->d_inode->i_sem);
+        mutex_unlock(&dentry->d_inode->i_mutex);
        release_configfs_dirent(cursor);
@@ -975,7 +975,7 @@ static loff_t configfs_dir_lseek(struct file * file, loff_t offset, int origin)
 {
        struct dentry * dentry = file->f_dentry;
-        down(&dentry->d_inode->i_sem);
+        mutex_lock(&dentry->d_inode->i_mutex);
        switch (origin) {
                case 1:
                        offset += file->f_pos;
@@ -983,7 +983,7 @@ static loff_t configfs_dir_lseek(struct file * file, loff_t offset, int origin)
                        if (offset >= 0)
                                break;
                default:
-                        up(&file->f_dentry->d_inode->i_sem);
+                        mutex_unlock(&file->f_dentry->d_inode->i_mutex);
                        return -EINVAL;
        }
        if (offset != file->f_pos) {
@@ -1007,7 +1007,7 @@ static loff_t configfs_dir_lseek(struct file * file, loff_t offset, int origin)
                        list_add_tail(&cursor->s_sibling, p);
                }
        }
-        up(&dentry->d_inode->i_sem);
+        mutex_unlock(&dentry->d_inode->i_mutex);
        return offset;
 }
@@ -1037,7 +1037,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
        sd = configfs_sb->s_root->d_fsdata;
        link_group(to_config_group(sd->s_element), group);
-        down(&configfs_sb->s_root->d_inode->i_sem);
+        mutex_lock(&configfs_sb->s_root->d_inode->i_mutex);
        name.name = group->cg_item.ci_name;
        name.len = strlen(name.name);
@@ -1057,7 +1057,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
        else
                d_delete(dentry);
-        up(&configfs_sb->s_root->d_inode->i_sem);
+        mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex);
        if (dentry) {
            dput(dentry);
@@ -1079,18 +1079,18 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
                return;
        }
-        down(&configfs_sb->s_root->d_inode->i_sem);
+        mutex_lock(&configfs_sb->s_root->d_inode->i_mutex);
-        down(&dentry->d_inode->i_sem);
+        mutex_lock(&dentry->d_inode->i_mutex);
        if (configfs_detach_prep(dentry)) {
                printk(KERN_ERR "configfs: Tried to unregister non-empty subsystem!\n");
        }
        configfs_detach_group(&group->cg_item);
        dentry->d_inode->i_flags |= S_DEAD;
-        up(&dentry->d_inode->i_sem);
+        mutex_unlock(&dentry->d_inode->i_mutex);
        d_delete(dentry);
-        up(&configfs_sb->s_root->d_inode->i_sem);
+        mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex);
        dput(dentry);
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index af1ffc9a15c0..c26cd61f13af 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -336,9 +336,9 @@ int configfs_add_file(struct dentry * dir, const struct configfs_attribute * att
        umode_t mode = (attr->ca_mode & S_IALLUGO) | S_IFREG;
        int error = 0;
-        down(&dir->d_inode->i_sem);
+        mutex_lock(&dir->d_inode->i_mutex);
        error = configfs_make_dirent(parent_sd, NULL, (void *) attr, mode, type);
-        up(&dir->d_inode->i_sem);
+        mutex_unlock(&dir->d_inode->i_mutex);
        return error;
 }
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 6b274c6d428f..6577c588de9d 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -122,7 +122,7 @@ const unsigned char * configfs_get_name(struct configfs_dirent *sd)
 /*
 * Unhashes the dentry corresponding to given configfs_dirent
- * Called with parent inode's i_sem held.
+ * Called with parent inode's i_mutex held.
 */
 void configfs_drop_dentry(struct configfs_dirent * sd, struct dentry * parent)
 {
@@ -145,7 +145,7 @@ void configfs_hash_and_remove(struct dentry * dir, const char * name)
        struct configfs_dirent * sd;
        struct configfs_dirent * parent_sd = dir->d_fsdata;
-        down(&dir->d_inode->i_sem);
+        mutex_lock(&dir->d_inode->i_mutex);
        list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
                if (!sd->s_element)
                        continue;
@@ -156,7 +156,7 @@ void configfs_hash_and_remove(struct dentry * dir, const char * name)
                        break;
                }
        }
-        up(&dir->d_inode->i_sem);
+        mutex_unlock(&dir->d_inode->i_mutex);
 }
diff --git a/fs/dcache.c b/fs/dcache.c
index 17e439138681..86bdb93789c6 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -71,7 +71,7 @@ struct dentry_stat_t dentry_stat = {
 static void d_callback(struct rcu_head *head)
 {
-        struct dentry * dentry = container_of(head, struct dentry, d_rcu);
+        struct dentry * dentry = container_of(head, struct dentry, d_u.d_rcu);
        if (dname_external(dentry))
                kfree(dentry->d_name.name);
@@ -86,7 +86,7 @@ static void d_free(struct dentry *dentry)
 {
        if (dentry->d_op && dentry->d_op->d_release)
                dentry->d_op->d_release(dentry);
-        call_rcu(&dentry->d_rcu, d_callback);
+        call_rcu(&dentry->d_u.d_rcu, d_callback);
 }
 /*
@@ -94,7 +94,7 @@ static void d_free(struct dentry *dentry)
 * d_iput() operation if defined.
 * Called with dcache_lock and per dentry lock held, drops both.
 */
-static inline void dentry_iput(struct dentry * dentry)
+static void dentry_iput(struct dentry * dentry)
 {
        struct inode *inode = dentry->d_inode;
        if (inode) {
@@ -193,7 +193,7 @@ kill_it: {
                        list_del(&dentry->d_lru);
                        dentry_stat.nr_unused--;
                }
-                list_del(&dentry->d_child);
+                list_del(&dentry->d_u.d_child);
                dentry_stat.nr_dentry--;        /* For d_free, below */
                /*drops the locks, at that point nobody can reach this dentry */
                dentry_iput(dentry);
@@ -367,7 +367,7 @@ static inline void prune_one_dentry(struct dentry * dentry)
        struct dentry * parent;
        __d_drop(dentry);
-        list_del(&dentry->d_child);
+        list_del(&dentry->d_u.d_child);
        dentry_stat.nr_dentry--;        /* For d_free, below */
        dentry_iput(dentry);
        parent = dentry->d_parent;
@@ -518,7 +518,7 @@ repeat:
 resume:
        while (next != &this_parent->d_subdirs) {
                struct list_head *tmp = next;
-                struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
+                struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
                next = tmp->next;
                /* Have we found a mount point ? */
                if (d_mountpoint(dentry))
@@ -532,7 +532,7 @@ resume:
         * All done at this level ... ascend and resume the search.
         */
        if (this_parent != parent) {
-                next = this_parent->d_child.next; 
+                next = this_parent->d_u.d_child.next;
                this_parent = this_parent->d_parent;
                goto resume;
        }
@@ -569,7 +569,7 @@ repeat:
 resume:
        while (next != &this_parent->d_subdirs) {
                struct list_head *tmp = next;
-                struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
+                struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
                next = tmp->next;
                if (!list_empty(&dentry->d_lru)) {
@@ -610,7 +610,7 @@ dentry->d_parent->d_name.name, dentry->d_name.name, found);
         * All done at this level ... ascend and resume the search.
         */
        if (this_parent != parent) {
-                next = this_parent->d_child.next; 
+                next = this_parent->d_u.d_child.next;
                this_parent = this_parent->d_parent;
 #ifdef DCACHE_DEBUG
 printk(KERN_DEBUG "select_parent: ascending to %s/%s, found=%d\n",
@@ -753,12 +753,12 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
                dentry->d_parent = dget(parent);
                dentry->d_sb = parent->d_sb;
        } else {
-                INIT_LIST_HEAD(&dentry->d_child);
+                INIT_LIST_HEAD(&dentry->d_u.d_child);
        }
        spin_lock(&dcache_lock);
        if (parent)
-                list_add(&dentry->d_child, &parent->d_subdirs);
+                list_add(&dentry->d_u.d_child, &parent->d_subdirs);
        dentry_stat.nr_dentry++;
        spin_unlock(&dcache_lock);
@@ -808,10 +808,14 @@ void d_instantiate(struct dentry *entry, struct inode * inode)
 *
 * Fill in inode information in the entry. On success, it returns NULL.
 * If an unhashed alias of "entry" already exists, then we return the
- * aliased dentry instead.
+ * aliased dentry instead and drop one reference to inode.
 *
 * Note that in order to avoid conflicts with rename() etc, the caller
 * had better be holding the parent directory semaphore.
+ *
+ * This also assumes that the inode count has been incremented
+ * (or otherwise set) by the caller to indicate that it is now
+ * in use by the dcache.
 */
 struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
 {
@@ -838,6 +842,7 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
                dget_locked(alias);
                spin_unlock(&dcache_lock);
                BUG_ON(!d_unhashed(alias));
+                iput(inode);
                return alias;
        }
        list_add(&entry->d_alias, &inode->i_dentry);
@@ -1310,8 +1315,8 @@ already_unhashed:
        /* Unhash the target: dput() will then get rid of it */
        __d_drop(target);
-        list_del(&dentry->d_child);
+        list_del(&dentry->d_u.d_child);
-        list_del(&target->d_child);
+        list_del(&target->d_u.d_child);
        /* Switch the names.. */
        switch_names(dentry, target);
@@ -1322,15 +1327,15 @@ already_unhashed:
        if (IS_ROOT(dentry)) {
                dentry->d_parent = target->d_parent;
                target->d_parent = target;
-                INIT_LIST_HEAD(&target->d_child);
+                INIT_LIST_HEAD(&target->d_u.d_child);
        } else {
                do_switch(dentry->d_parent, target->d_parent);
                /* And add them back to the (new) parent lists */
-                list_add(&target->d_child, &target->d_parent->d_subdirs);
+                list_add(&target->d_u.d_child, &target->d_parent->d_subdirs);
        }
-        list_add(&dentry->d_child, &dentry->d_parent->d_subdirs);
+        list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
        spin_unlock(&target->d_lock);
        spin_unlock(&dentry->d_lock);
        write_sequnlock(&rename_lock);
@@ -1568,7 +1573,7 @@ repeat:
 resume:
        while (next != &this_parent->d_subdirs) {
                struct list_head *tmp = next;
-                struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
+                struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
                next = tmp->next;
                if (d_unhashed(dentry)||!dentry->d_inode)
                        continue;
@@ -1579,7 +1584,7 @@ resume:
                atomic_dec(&dentry->d_count);
        }
        if (this_parent != root) {
-                next = this_parent->d_child.next; 
+                next = this_parent->d_u.d_child.next;
                atomic_dec(&this_parent->d_count);
                this_parent = this_parent->d_parent;
                goto resume;
diff --git a/fs/dcookies.c b/fs/dcookies.c
index 02aa0ddc582a..f8274a8f83bd 100644
--- a/fs/dcookies.c
+++ b/fs/dcookies.c
@@ -18,6 +18,7 @@
 #include <linux/slab.h>
 #include <linux/list.h>
 #include <linux/mount.h>
+#include <linux/capability.h>
 #include <linux/dcache.h>
 #include <linux/mm.h>
 #include <linux/errno.h>
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index a86ac4aeaedb..d4f1a2cddd47 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -146,7 +146,7 @@ static int debugfs_create_by_name(const char *name, mode_t mode,
        }
        *dentry = NULL;
-        down(&parent->d_inode->i_sem);
+        mutex_lock(&parent->d_inode->i_mutex);
        *dentry = lookup_one_len(name, parent, strlen(name));
        if (!IS_ERR(dentry)) {
                if ((mode & S_IFMT) == S_IFDIR)
@@ -155,7 +155,7 @@ static int debugfs_create_by_name(const char *name, mode_t mode,
                        error = debugfs_create(parent->d_inode, *dentry, mode);
        } else
                error = PTR_ERR(dentry);
-        up(&parent->d_inode->i_sem);
+        mutex_unlock(&parent->d_inode->i_mutex);
        return error;
 }
@@ -273,7 +273,7 @@ void debugfs_remove(struct dentry *dentry)
        if (!parent || !parent->d_inode)
                return;
-        down(&parent->d_inode->i_sem);
+        mutex_lock(&parent->d_inode->i_mutex);
        if (debugfs_positive(dentry)) {
                if (dentry->d_inode) {
                        if (S_ISDIR(dentry->d_inode->i_mode))
@@ -283,7 +283,7 @@ void debugfs_remove(struct dentry *dentry)
                dput(dentry);
                }
        }
-        up(&parent->d_inode->i_sem);
+        mutex_unlock(&parent->d_inode->i_mutex);
        simple_release_fs(&debugfs_mount, &debugfs_mount_count);
 }
 EXPORT_SYMBOL_GPL(debugfs_remove);
diff --git a/fs/devfs/base.c b/fs/devfs/base.c
index 1274422a5384..b621521e09d4 100644
--- a/fs/devfs/base.c
+++ b/fs/devfs/base.c
@@ -2162,27 +2162,27 @@ static int devfs_d_revalidate_wait(struct dentry *dentry, struct nameidata *nd)
         *
         * make sure that
         *   d_instantiate always runs under lock
-         *   we release i_sem lock before going to sleep
+         *   we release i_mutex lock before going to sleep
         *
         * unfortunately sometimes d_revalidate is called with
-         * and sometimes without i_sem lock held. The following checks
+         * and sometimes without i_mutex lock held. The following checks
         * attempt to deduce when we need to add (and drop resp.) lock
         * here. This relies on current (2.6.2) calling coventions:
         *
-         *   lookup_hash is always run under i_sem and is passing NULL
+         *   lookup_hash is always run under i_mutex and is passing NULL
         *   as nd
         *
-         *   open(...,O_CREATE,...) calls _lookup_hash under i_sem
+         *   open(...,O_CREATE,...) calls _lookup_hash under i_mutex
         *   and sets flags to LOOKUP_OPEN|LOOKUP_CREATE
         *
         *   all other invocations of ->d_revalidate seem to happen
-         *   outside of i_sem
+         *   outside of i_mutex
         */
        need_lock = nd &&
            (!(nd->flags & LOOKUP_CREATE) || (nd->flags & LOOKUP_PARENT));
        if (need_lock)
-                down(&dir->i_sem);
+                mutex_lock(&dir->i_mutex);
        if (is_devfsd_or_child(fs_info)) {
                devfs_handle_t de = lookup_info->de;
@@ -2221,9 +2221,9 @@ static int devfs_d_revalidate_wait(struct dentry *dentry, struct nameidata *nd)
                add_wait_queue(&lookup_info->wait_queue, &wait);
                read_unlock(&parent->u.dir.lock);
                /* at this point it is always (hopefully) locked */
-                up(&dir->i_sem);
+                mutex_unlock(&dir->i_mutex);
                schedule();
-                down(&dir->i_sem);
+                mutex_lock(&dir->i_mutex);
                /*
                 * This does not need nor should remove wait from wait_queue.
                 * Wait queue head is never reused - nothing is ever added to it
@@ -2238,7 +2238,7 @@ static int devfs_d_revalidate_wait(struct dentry *dentry, struct nameidata *nd)
      out:
        if (need_lock)
-                up(&dir->i_sem);
+                mutex_unlock(&dir->i_mutex);
        return 1;
 }                               /*  End Function devfs_d_revalidate_wait  */
@@ -2284,9 +2284,9 @@ static struct dentry *devfs_lookup(struct inode *dir, struct dentry *dentry,
        /*  Unlock directory semaphore, which will release any waiters. They
           will get the hashed dentry, and may be forced to wait for
           revalidation  */
-        up(&dir->i_sem);
+        mutex_unlock(&dir->i_mutex);
        wait_for_devfsd_finished(fs_info);      /*  If I'm not devfsd, must wait  */
-        down(&dir->i_sem);      /*  Grab it again because them's the rules  */
+        mutex_lock(&dir->i_mutex);      /*  Grab it again because them's the rules  */
        de = lookup_info.de;
        /*  If someone else has been so kind as to make the inode, we go home
           early  */
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index f2be44d4491f..bfb8a230bac9 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -130,7 +130,7 @@ static struct dentry *get_node(int num)
 {
        char s[12];
        struct dentry *root = devpts_root;
-        down(&root->d_inode->i_sem);
+        mutex_lock(&root->d_inode->i_mutex);
        return lookup_one_len(s, root, sprintf(s, "%d", num));
 }
@@ -161,7 +161,7 @@ int devpts_pty_new(struct tty_struct *tty)
        if (!IS_ERR(dentry) && !dentry->d_inode)
                d_instantiate(dentry, inode);
-        up(&devpts_root->d_inode->i_sem);
+        mutex_unlock(&devpts_root->d_inode->i_mutex);
        return 0;
 }
@@ -178,7 +178,7 @@ struct tty_struct *devpts_get_tty(int number)
                dput(dentry);
        }
-        up(&devpts_root->d_inode->i_sem);
+        mutex_unlock(&devpts_root->d_inode->i_mutex);
        return tty;
 }
@@ -196,7 +196,7 @@ void devpts_pty_kill(int number)
                }
                dput(dentry);
        }
-        up(&devpts_root->d_inode->i_sem);
+        mutex_unlock(&devpts_root->d_inode->i_mutex);
 }
 static int __init init_devpts_fs(void)
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 3931e7f1e6bf..30dbbd1df511 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -56,7 +56,7 @@
 * lock_type is DIO_LOCKING for regular files on direct-IO-naive filesystems.
 * This determines whether we need to do the fancy locking which prevents
 * direct-IO from being able to read uninitialised disk blocks.  If its zero
- * (blockdev) this locking is not done, and if it is DIO_OWN_LOCKING i_sem is
+ * (blockdev) this locking is not done, and if it is DIO_OWN_LOCKING i_mutex is
 * not held for the entire direct write (taken briefly, initially, during a
 * direct read though, but its never held for the duration of a direct-IO).
 */
@@ -930,7 +930,7 @@ out:
 }
 /*
- * Releases both i_sem and i_alloc_sem
+ * Releases both i_mutex and i_alloc_sem
 */
 static ssize_t
 direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, 
@@ -1062,11 +1062,11 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
        /*
         * All block lookups have been performed. For READ requests
-         * we can let i_sem go now that its achieved its purpose
+         * we can let i_mutex go now that its achieved its purpose
         * of protecting us from looking up uninitialized blocks.
         */
        if ((rw == READ) && (dio->lock_type == DIO_LOCKING))
-                up(&dio->inode->i_sem);
+                mutex_unlock(&dio->inode->i_mutex);
        /*
         * OK, all BIOs are submitted, so we can decrement bio_count to truly
@@ -1145,18 +1145,18 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 * The locking rules are governed by the dio_lock_type parameter.
 *
 * DIO_NO_LOCKING (no locking, for raw block device access)
- * For writes, i_sem is not held on entry; it is never taken.
+ * For writes, i_mutex is not held on entry; it is never taken.
 *
 * DIO_LOCKING (simple locking for regular files)
- * For writes we are called under i_sem and return with i_sem held, even though
+ * For writes we are called under i_mutex and return with i_mutex held, even though
 * it is internally dropped.
- * For reads, i_sem is not held on entry, but it is taken and dropped before
+ * For reads, i_mutex is not held on entry, but it is taken and dropped before
 * returning.
 *
 * DIO_OWN_LOCKING (filesystem provides synchronisation and handling of
 *      uninitialised data, allowing parallel direct readers and writers)
- * For writes we are called without i_sem, return without it, never touch it.
+ * For writes we are called without i_mutex, return without it, never touch it.
- * For reads, i_sem is held on entry and will be released before returning.
+ * For reads, i_mutex is held on entry and will be released before returning.
 *
 * Additional i_alloc_sem locking requirements described inline below.
 */
@@ -1214,11 +1214,11 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
         * For block device access DIO_NO_LOCKING is used,
         *      neither readers nor writers do any locking at all
         * For regular files using DIO_LOCKING,
-         *      readers need to grab i_sem and i_alloc_sem
+         *      readers need to grab i_mutex and i_alloc_sem
-         *      writers need to grab i_alloc_sem only (i_sem is already held)
+         *      writers need to grab i_alloc_sem only (i_mutex is already held)
         * For regular files using DIO_OWN_LOCKING,
         *      neither readers nor writers take any locks here
-         *      (i_sem is already held and release for writers here)
+         *      (i_mutex is already held and release for writers here)
         */
        dio->lock_type = dio_lock_type;
        if (dio_lock_type != DIO_NO_LOCKING) {
@@ -1228,7 +1228,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
                        mapping = iocb->ki_filp->f_mapping;
                        if (dio_lock_type != DIO_OWN_LOCKING) {
-                                down(&inode->i_sem);
+                                mutex_lock(&inode->i_mutex);
                                reader_with_isem = 1;
                        }
@@ -1240,7 +1240,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
                        }
                        if (dio_lock_type == DIO_OWN_LOCKING) {
-                                up(&inode->i_sem);
+                                mutex_unlock(&inode->i_mutex);
                                reader_with_isem = 0;
                        }
                }
@@ -1266,7 +1266,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 out:
        if (reader_with_isem)
-                up(&inode->i_sem);
+                mutex_unlock(&inode->i_mutex);
        if (rw & WRITE)
                current->flags &= ~PF_SYNCWRITE;
        return retval;
diff --git a/fs/dquot.c b/fs/dquot.c
index 2a62b3dc20ec..1966c890b48d 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -77,6 +77,7 @@
 #include <linux/kmod.h>
 #include <linux/namei.h>
 #include <linux/buffer_head.h>
+#include <linux/capability.h>
 #include <linux/quotaops.h>
 #include <asm/uaccess.h>
@@ -100,7 +101,7 @@
 * operation is just reading pointers from inode (or not using them at all) the
 * read lock is enough. If pointers are altered function must hold write lock
 * (these locking rules also apply for S_NOQUOTA flag in the inode - note that
- * for altering the flag i_sem is also needed).  If operation is holding
+ * for altering the flag i_mutex is also needed).  If operation is holding
 * reference to dquot in other way (e.g. quotactl ops) it must be guarded by
 * dqonoff_sem.
 * This locking assures that:
@@ -117,9 +118,9 @@
 * spinlock to internal buffers before writing.
 *
 * Lock ordering (including related VFS locks) is the following:
- *   i_sem > dqonoff_sem > iprune_sem > journal_lock > dqptr_sem >
+ *   i_mutex > dqonoff_sem > iprune_sem > journal_lock > dqptr_sem >
 *   > dquot->dq_lock > dqio_sem
- * i_sem on quota files is special (it's below dqio_sem)
+ * i_mutex on quota files is special (it's below dqio_sem)
 */
 static DEFINE_SPINLOCK(dq_list_lock);
@@ -1369,11 +1370,11 @@ int vfs_quota_off(struct super_block *sb, int type)
                        /* If quota was reenabled in the meantime, we have
                         * nothing to do */
                        if (!sb_has_quota_enabled(sb, cnt)) {
-                                down(&toputinode[cnt]->i_sem);
+                                mutex_lock(&toputinode[cnt]->i_mutex);
                                toputinode[cnt]->i_flags &= ~(S_IMMUTABLE |
                                  S_NOATIME | S_NOQUOTA);
                                truncate_inode_pages(&toputinode[cnt]->i_data, 0);
-                                up(&toputinode[cnt]->i_sem);
+                                mutex_unlock(&toputinode[cnt]->i_mutex);
                                mark_inode_dirty(toputinode[cnt]);
                                iput(toputinode[cnt]);
                        }
@@ -1417,7 +1418,7 @@ static int vfs_quota_on_inode(struct inode *inode, int type, int format_id)
        write_inode_now(inode, 1);
        /* And now flush the block cache so that kernel sees the changes */
        invalidate_bdev(sb->s_bdev, 0);
-        down(&inode->i_sem);
+        mutex_lock(&inode->i_mutex);
        down(&dqopt->dqonoff_sem);
        if (sb_has_quota_enabled(sb, type)) {
                error = -EBUSY;
@@ -1449,7 +1450,7 @@ static int vfs_quota_on_inode(struct inode *inode, int type, int format_id)
                goto out_file_init;
        }
        up(&dqopt->dqio_sem);
-        up(&inode->i_sem);
+        mutex_unlock(&inode->i_mutex);
        set_enable_flags(dqopt, type);
        add_dquot_ref(sb, type);
@@ -1470,7 +1471,7 @@ out_lock:
                inode->i_flags |= oldflags;
                up_write(&dqopt->dqptr_sem);
        }
-        up(&inode->i_sem);
+        mutex_unlock(&inode->i_mutex);
 out_fmt:
        put_quota_format(fmt);
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
new file mode 100644
index 000000000000..4e4762389bdc
--- /dev/null
+++ b/fs/drop_caches.c
@@ -0,0 +1,68 @@
+/*
+ * Implement the manual drop-all-pagecache function
+ */
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/writeback.h>
+#include <linux/sysctl.h>
+#include <linux/gfp.h>
+/* A global variable is a bit ugly, but it keeps the code simple */
+int sysctl_drop_caches;
+static void drop_pagecache_sb(struct super_block *sb)
+{
+        struct inode *inode;
+        spin_lock(&inode_lock);
+        list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
+                if (inode->i_state & (I_FREEING|I_WILL_FREE))
+                        continue;
+                invalidate_inode_pages(inode->i_mapping);
+        }
+        spin_unlock(&inode_lock);
+}
+void drop_pagecache(void)
+{
+        struct super_block *sb;
+        spin_lock(&sb_lock);
+restart:
+        list_for_each_entry(sb, &super_blocks, s_list) {
+                sb->s_count++;
+                spin_unlock(&sb_lock);
+                down_read(&sb->s_umount);
+                if (sb->s_root)
+                        drop_pagecache_sb(sb);
+                up_read(&sb->s_umount);
+                spin_lock(&sb_lock);
+                if (__put_super_and_need_restart(sb))
+                        goto restart;
+        }
+        spin_unlock(&sb_lock);
+}
+void drop_slab(void)
+{
+        int nr_objects;
+        do {
+                nr_objects = shrink_slab(1000, GFP_KERNEL, 1000);
+        } while (nr_objects > 10);
+}
+int drop_caches_sysctl_handler(ctl_table *table, int write,
+        struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+{
+        proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+        if (write) {
+                if (sysctl_drop_caches & 1)
+                        drop_pagecache();
+                if (sysctl_drop_caches & 2)
+                        drop_slab();
+        }
+        return 0;
+}
diff --git a/fs/efs/super.c b/fs/efs/super.c
index d8d5ea9a9997..afc4891feb36 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -222,12 +222,13 @@ static efs_block_t efs_validate_vh(struct volume_header *vh) {
                        sblock);
 #endif
        }
-        return(sblock);
+        return sblock;
 }
 static int efs_validate_super(struct efs_sb_info *sb, struct efs_super *super) {
-        if (!IS_EFS_MAGIC(be32_to_cpu(super->fs_magic))) return -1;
+        if (!IS_EFS_MAGIC(be32_to_cpu(super->fs_magic)))
+                return -1;
        sb->fs_magic     = be32_to_cpu(super->fs_magic);
        sb->total_blocks = be32_to_cpu(super->fs_size);
diff --git a/fs/exec.c b/fs/exec.c
index e75a9548da8e..055378d2513e 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -477,7 +477,7 @@ struct file *open_exec(const char *name)
        int err;
        struct file *file;
-        err = path_lookup_open(name, LOOKUP_FOLLOW, &nd, FMODE_READ);
+        err = path_lookup_open(AT_FDCWD, name, LOOKUP_FOLLOW, &nd, FMODE_READ);
        file = ERR_PTR(err);
        if (!err) {
@@ -575,7 +575,7 @@ static int exec_mmap(struct mm_struct *mm)
 * disturbing other processes.  (Other processes might share the signal
 * table via the CLONE_SIGHAND option to clone().)
 */
-static inline int de_thread(struct task_struct *tsk)
+static int de_thread(struct task_struct *tsk)
 {
        struct signal_struct *sig = tsk->signal;
        struct sighand_struct *newsighand, *oldsighand = tsk->sighand;
@@ -632,10 +632,10 @@ static inline int de_thread(struct task_struct *tsk)
                 * synchronize with any firing (by calling del_timer_sync)
                 * before we can safely let the old group leader die.
                 */
-                sig->real_timer.data = (unsigned long)current;
+                sig->real_timer.data = current;
                spin_unlock_irq(lock);
-                if (del_timer_sync(&sig->real_timer))
+                if (hrtimer_cancel(&sig->real_timer))
-                        add_timer(&sig->real_timer);
+                        hrtimer_restart(&sig->real_timer);
                spin_lock_irq(lock);
        }
        while (atomic_read(&sig->count) > count) {
@@ -760,7 +760,7 @@ no_thread_group:
                spin_lock(&oldsighand->siglock);
                spin_lock(&newsighand->siglock);
-                current->sighand = newsighand;
+                rcu_assign_pointer(current->sighand, newsighand);
                recalc_sigpending();
                spin_unlock(&newsighand->siglock);
@@ -768,7 +768,7 @@ no_thread_group:
                write_unlock_irq(&tasklist_lock);
                if (atomic_dec_and_test(&oldsighand->count))
-                        kmem_cache_free(sighand_cachep, oldsighand);
+                        sighand_free(oldsighand);
        }
        BUG_ON(!thread_group_leader(current));
@@ -780,7 +780,7 @@ no_thread_group:
 * so that a new one can be started
 */
-static inline void flush_old_files(struct files_struct * files)
+static void flush_old_files(struct files_struct * files)
 {
        long j = -1;
        struct fdtable *fdt;
@@ -964,7 +964,7 @@ int prepare_binprm(struct linux_binprm *bprm)
 EXPORT_SYMBOL(prepare_binprm);
-static inline int unsafe_exec(struct task_struct *p)
+static int unsafe_exec(struct task_struct *p)
 {
        int unsafe = 0;
        if (p->ptrace & PT_PTRACED) {
@@ -1462,6 +1462,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
        if (!(current->signal->flags & SIGNAL_GROUP_EXIT)) {
                current->signal->flags = SIGNAL_GROUP_EXIT;
                current->signal->group_exit_code = exit_code;
+                current->signal->group_stop_count = 0;
                retval = 0;
        }
        spin_unlock_irq(&current->sighand->siglock);
@@ -1477,7 +1478,6 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
         * Clear any false indication of pending signals that might
         * be seen by the filesystem code called to write the core file.
         */
-        current->signal->group_stop_count = 0;
        clear_thread_flag(TIF_SIGPENDING);
        if (current->signal->rlim[RLIMIT_CORE].rlim_cur < binfmt->min_coredump)
@@ -1505,7 +1505,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
                goto close_fail;
        if (!file->f_op->write)
                goto close_fail;
-        if (do_truncate(file->f_dentry, 0, file) != 0)
+        if (do_truncate(file->f_dentry, 0, 0, file) != 0)
                goto close_fail;
        retval = binfmt->core_dump(signr, regs, file);
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index c49d6254379a..b06b54f1bbbb 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -11,6 +11,33 @@ struct export_operations export_op_default;
 #define dprintk(fmt, args...) do{}while(0)
+static struct dentry *
+find_acceptable_alias(struct dentry *result,
+                int (*acceptable)(void *context, struct dentry *dentry),
+                void *context)
+{
+        struct dentry *dentry, *toput = NULL;
+        spin_lock(&dcache_lock);
+        list_for_each_entry(dentry, &result->d_inode->i_dentry, d_alias) {
+                dget_locked(dentry);
+                spin_unlock(&dcache_lock);
+                if (toput)
+                        dput(toput);
+                if (dentry != result && acceptable(context, dentry)) {
+                        dput(result);
+                        return dentry;
+                }
+                spin_lock(&dcache_lock);
+                toput = dentry;
+        }
+        spin_unlock(&dcache_lock);
+        if (toput)
+                dput(toput);
+        return NULL;
+}
 /**
 * find_exported_dentry - helper routine to implement export_operations->decode_fh
 * @sb:         The &super_block identifying the filesystem
@@ -52,8 +79,7 @@ find_exported_dentry(struct super_block *sb, void *obj, void *parent,
        struct dentry *target_dir;
        int err;
        struct export_operations *nops = sb->s_export_op;
-        struct list_head *le, *head;
+        struct dentry *alias;
-        struct dentry *toput = NULL;
        int noprogress;
        char nbuf[NAME_MAX+1];
@@ -79,27 +105,10 @@ find_exported_dentry(struct super_block *sb, void *obj, void *parent,
                        /* there is no other dentry, so fail */
                        goto err_result;
                }
-                /* try any other aliases */
-                spin_lock(&dcache_lock);
+                alias = find_acceptable_alias(result, acceptable, context);
-                head = &result->d_inode->i_dentry;
+                if (alias)
-                list_for_each(le, head) {
+                        return alias;
-                        struct dentry *dentry = list_entry(le, struct dentry, d_alias);
-                        dget_locked(dentry);
-                        spin_unlock(&dcache_lock);
-                        if (toput)
-                                dput(toput);
-                        toput = NULL;
-                        if (dentry != result &&
-                            acceptable(context, dentry)) {
-                                dput(result);
-                                return dentry;
-                        }
-                        spin_lock(&dcache_lock);
-                        toput = dentry;
-                }
-                spin_unlock(&dcache_lock);
-                if (toput)
-                        dput(toput);
        }                       
        /* It's a directory, or we are required to confirm the file's
@@ -177,9 +186,9 @@ find_exported_dentry(struct super_block *sb, void *obj, void *parent,
                        struct dentry *ppd;
                        struct dentry *npd;
-                        down(&pd->d_inode->i_sem);
+                        mutex_lock(&pd->d_inode->i_mutex);
                        ppd = CALL(nops,get_parent)(pd);
-                        up(&pd->d_inode->i_sem);
+                        mutex_unlock(&pd->d_inode->i_mutex);
                        if (IS_ERR(ppd)) {
                                err = PTR_ERR(ppd);
@@ -201,9 +210,9 @@ find_exported_dentry(struct super_block *sb, void *obj, void *parent,
                                break;
                        }
                        dprintk("find_exported_dentry: found name: %s\n", nbuf);
-                        down(&ppd->d_inode->i_sem);
+                        mutex_lock(&ppd->d_inode->i_mutex);
                        npd = lookup_one_len(nbuf, ppd, strlen(nbuf));
-                        up(&ppd->d_inode->i_sem);
+                        mutex_unlock(&ppd->d_inode->i_mutex);
                        if (IS_ERR(npd)) {
                                err = PTR_ERR(npd);
                                dprintk("find_exported_dentry: lookup failed: %d\n", err);
@@ -242,9 +251,9 @@ find_exported_dentry(struct super_block *sb, void *obj, void *parent,
                struct dentry *nresult;
                err = CALL(nops,get_name)(target_dir, nbuf, result);
                if (!err) {
-                        down(&target_dir->d_inode->i_sem);
+                        mutex_lock(&target_dir->d_inode->i_mutex);
                        nresult = lookup_one_len(nbuf, target_dir, strlen(nbuf));
-                        up(&target_dir->d_inode->i_sem);
+                        mutex_unlock(&target_dir->d_inode->i_mutex);
                        if (!IS_ERR(nresult)) {
                                if (nresult->d_inode) {
                                        dput(result);
@@ -258,26 +267,10 @@ find_exported_dentry(struct super_block *sb, void *obj, void *parent,
        /* now result is properly connected, it is our best bet */
        if (acceptable(context, result))
                return result;
-        /* one last try of the aliases.. */
-        spin_lock(&dcache_lock);
+        alias = find_acceptable_alias(result, acceptable, context);
-        toput = NULL;
+        if (alias)
-        head = &result->d_inode->i_dentry;
+                return alias;
-        list_for_each(le, head) {
-                struct dentry *dentry = list_entry(le, struct dentry, d_alias);
-                dget_locked(dentry);
-                spin_unlock(&dcache_lock);
-                if (toput) dput(toput);
-                if (dentry != result &&
-                    acceptable(context, dentry)) {
-                        dput(result);
-                        return dentry;
-                }
-                spin_lock(&dcache_lock);
-                toput = dentry;
-        }
-        spin_unlock(&dcache_lock);
-        if (toput)
-                dput(toput);
        /* drat - I just cannot find anything acceptable */
        dput(result);
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index 6af2f4130290..35acc43b897f 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -4,6 +4,7 @@
 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
 */
+#include <linux/capability.h>
 #include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
@@ -149,7 +150,7 @@ ext2_iset_acl(struct inode *inode, struct posix_acl **i_acl,
 }
 /*
- * inode->i_sem: don't care
+ * inode->i_mutex: don't care
 */
 static struct posix_acl *
 ext2_get_acl(struct inode *inode, int type)
@@ -211,7 +212,7 @@ ext2_get_acl(struct inode *inode, int type)
 }
 /*
- * inode->i_sem: down
+ * inode->i_mutex: down
 */
 static int
 ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
@@ -301,8 +302,8 @@ ext2_permission(struct inode *inode, int mask, struct nameidata *nd)
 /*
 * Initialize the ACLs of a new inode. Called from ext2_new_inode.
 *
- * dir->i_sem: down
+ * dir->i_mutex: down
- * inode->i_sem: up (access to inode is still exclusive)
+ * inode->i_mutex: up (access to inode is still exclusive)
 */
 int
 ext2_init_acl(struct inode *inode, struct inode *dir)
@@ -361,7 +362,7 @@ cleanup:
 * for directories) are added. There are no more bits available in the
 * file mode.
 *
- * inode->i_sem: down
+ * inode->i_mutex: down
 */
 int
 ext2_acl_chmod(struct inode *inode)
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index bb6908066494..2c00953d4b0b 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -16,6 +16,7 @@
 #include <linux/quotaops.h>
 #include <linux/sched.h>
 #include <linux/buffer_head.h>
+#include <linux/capability.h>
 /*
 * balloc.c contains the blocks allocation and deallocation routines
diff --git a/fs/ext2/bitmap.c b/fs/ext2/bitmap.c
index 20145b74623f..e9983a0dd396 100644
--- a/fs/ext2/bitmap.c
+++ b/fs/ext2/bitmap.c
@@ -7,8 +7,12 @@
 * Universite Pierre et Marie Curie (Paris VI)
 */
+#ifdef EXT2FS_DEBUG
 #include <linux/buffer_head.h>
+#include "ext2.h"
 static int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
 unsigned long ext2_count_free (struct buffer_head * map, unsigned int numchars)
@@ -23,3 +27,6 @@ unsigned long ext2_count_free (struct buffer_head * map, unsigned int numchars)
                        nibblemap[(map->b_data[i] >> 4) & 0xf];
        return (sum);
 }
+#endif  /*  EXT2FS_DEBUG  */
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 5b5f52876b42..7442bdd1267a 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -592,7 +592,7 @@ int ext2_make_empty(struct inode *inode, struct inode *parent)
                goto fail;
        }
        kaddr = kmap_atomic(page, KM_USER0);
-       memset(kaddr, 0, chunk_size);
+        memset(kaddr, 0, chunk_size);
        de = (struct ext2_dir_entry_2 *)kaddr;
        de->name_len = 1;
        de->rec_len = cpu_to_le16(EXT2_DIR_REC_LEN(1));
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index e977f8566d14..00de0a7312a2 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -53,7 +53,7 @@ struct ext2_inode_info {
 #ifdef CONFIG_EXT2_FS_XATTR
        /*
         * Extended attributes can be read independently of the main file
-         * data. Taking i_sem even when reading would cause contention
+         * data. Taking i_mutex even when reading would cause contention
         * between readers of EAs and writers of regular file data, so
         * instead we synchronize on xattr_sem when reading or changing
         * EAs.
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c
index 709d8676b962..3ca9afdf713d 100644
--- a/fs/ext2/ioctl.c
+++ b/fs/ext2/ioctl.c
@@ -8,6 +8,7 @@
 */
 #include "ext2.h"
+#include <linux/capability.h>
 #include <linux/time.h>
 #include <linux/sched.h>
 #include <asm/current.h>
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index c5513953c825..ad1432a2a62e 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -83,10 +83,7 @@ static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, str
                if (!inode)
                        return ERR_PTR(-EACCES);
        }
-        if (inode)
+        return d_splice_alias(inode, dentry);
-                return d_splice_alias(inode, dentry);
-        d_add(dentry, inode);
-        return NULL;
 }
 struct dentry *ext2_get_parent(struct dentry *child)
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 522fa70dd8ea..8d6819846fc9 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1152,7 +1152,7 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type,
        struct buffer_head tmp_bh;
        struct buffer_head *bh;
-        down(&inode->i_sem);
+        mutex_lock(&inode->i_mutex);
        while (towrite > 0) {
                tocopy = sb->s_blocksize - offset < towrite ?
                                sb->s_blocksize - offset : towrite;
@@ -1189,7 +1189,7 @@ out:
        inode->i_version++;
        inode->i_mtime = inode->i_ctime = CURRENT_TIME;
        mark_inode_dirty(inode);
-        up(&inode->i_sem);
+        mutex_unlock(&inode->i_mutex);
        return len - towrite;
 }
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 0099462d4271..a2ca3107d475 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -325,7 +325,7 @@ cleanup:
 /*
 * Inode operation listxattr()
 *
- * dentry->d_inode->i_sem: don't care
+ * dentry->d_inode->i_mutex: don't care
 */
 ssize_t
 ext2_listxattr(struct dentry *dentry, char *buffer, size_t size)
@@ -389,10 +389,6 @@ ext2_xattr_set(struct inode *inode, int name_index, const char *name,
        ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld",
                  name_index, name, value, (long)value_len);
-        if (IS_RDONLY(inode))
-                return -EROFS;
-        if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-                return -EPERM;
        if (value == NULL)
                value_len = 0;
        if (name == NULL)
diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c
index 52b30ee6a25f..f28a6a499c96 100644
--- a/fs/ext2/xattr_trusted.c
+++ b/fs/ext2/xattr_trusted.c
@@ -7,6 +7,7 @@
 #include <linux/module.h>
 #include <linux/string.h>
+#include <linux/capability.h>
 #include <linux/fs.h>
 #include <linux/smp_lock.h>
 #include <linux/ext2_fs.h>
@@ -38,8 +39,6 @@ ext2_xattr_trusted_get(struct inode *inode, const char *name,
 {
        if (strcmp(name, "") == 0)
                return -EINVAL;
-        if (!capable(CAP_SYS_ADMIN))
-                return -EPERM;
        return ext2_xattr_get(inode, EXT2_XATTR_INDEX_TRUSTED, name,
                              buffer, size);
 }
@@ -50,8 +49,6 @@ ext2_xattr_trusted_set(struct inode *inode, const char *name,
 {
        if (strcmp(name, "") == 0)
                return -EINVAL;
-        if (!capable(CAP_SYS_ADMIN))
-                return -EPERM;
        return ext2_xattr_set(inode, EXT2_XATTR_INDEX_TRUSTED, name,
                              value, size, flags);
 }
diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c
index 0c03ea131a94..f383e7c3a7b5 100644
--- a/fs/ext2/xattr_user.c
+++ b/fs/ext2/xattr_user.c
@@ -35,16 +35,10 @@ static int
 ext2_xattr_user_get(struct inode *inode, const char *name,
                    void *buffer, size_t size)
 {
-        int error;
        if (strcmp(name, "") == 0)
                return -EINVAL;
        if (!test_opt(inode->i_sb, XATTR_USER))
                return -EOPNOTSUPP;
-        error = permission(inode, MAY_READ, NULL);
-        if (error)
-                return error;
        return ext2_xattr_get(inode, EXT2_XATTR_INDEX_USER, name, buffer, size);
 }
@@ -52,18 +46,10 @@ static int
 ext2_xattr_user_set(struct inode *inode, const char *name,
                    const void *value, size_t size, int flags)
 {
-        int error;
        if (strcmp(name, "") == 0)
                return -EINVAL;
        if (!test_opt(inode->i_sb, XATTR_USER))
                return -EOPNOTSUPP;
-        if ( !S_ISREG(inode->i_mode) &&
-            (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
-                return -EPERM;
-        error = permission(inode, MAY_WRITE, NULL);
-        if (error)
-                return error;
        return ext2_xattr_set(inode, EXT2_XATTR_INDEX_USER, name,
                              value, size, flags);
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index 3ac38266fc9e..47a9da2dfb4f 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -7,6 +7,7 @@
 #include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include <linux/capability.h>
 #include <linux/fs.h>
 #include <linux/ext3_jbd.h>
 #include <linux/ext3_fs.h>
@@ -152,7 +153,7 @@ ext3_iset_acl(struct inode *inode, struct posix_acl **i_acl,
 /*
 * Inode operation get_posix_acl().
 *
- * inode->i_sem: don't care
+ * inode->i_mutex: don't care
 */
 static struct posix_acl *
 ext3_get_acl(struct inode *inode, int type)
@@ -216,7 +217,7 @@ ext3_get_acl(struct inode *inode, int type)
 /*
 * Set the access or default ACL of an inode.
 *
- * inode->i_sem: down unless called from ext3_new_inode
+ * inode->i_mutex: down unless called from ext3_new_inode
 */
 static int
 ext3_set_acl(handle_t *handle, struct inode *inode, int type,
@@ -306,8 +307,8 @@ ext3_permission(struct inode *inode, int mask, struct nameidata *nd)
 /*
 * Initialize the ACLs of a new inode. Called from ext3_new_inode.
 *
- * dir->i_sem: down
+ * dir->i_mutex: down
- * inode->i_sem: up (access to inode is still exclusive)
+ * inode->i_mutex: up (access to inode is still exclusive)
 */
 int
 ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
@@ -368,7 +369,7 @@ cleanup:
 * for directories) are added. There are no more bits available in the
 * file mode.
 *
- * inode->i_sem: down
+ * inode->i_mutex: down
 */
 int
 ext3_acl_chmod(struct inode *inode)
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index ae1148c24c53..6250fcdf14a1 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -13,6 +13,7 @@
 #include <linux/config.h>
 #include <linux/time.h>
+#include <linux/capability.h>
 #include <linux/fs.h>
 #include <linux/jbd.h>
 #include <linux/ext3_fs.h>
@@ -20,8 +21,6 @@
 #include <linux/quotaops.h>
 #include <linux/buffer_head.h>
-#include "bitmap.h"
 /*
 * balloc.c contains the blocks allocation and deallocation routines
 */
diff --git a/fs/ext3/bitmap.c b/fs/ext3/bitmap.c
index 5b4ba3e246e6..cb16b4c5d5df 100644
--- a/fs/ext3/bitmap.c
+++ b/fs/ext3/bitmap.c
@@ -7,8 +7,11 @@
 * Universite Pierre et Marie Curie (Paris VI)
 */
+#ifdef EXT3FS_DEBUG
 #include <linux/buffer_head.h>
-#include "bitmap.h"
+#include "ext3_fs.h"
 static int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
@@ -24,3 +27,6 @@ unsigned long ext3_count_free (struct buffer_head * map, unsigned int numchars)
                        nibblemap[(map->b_data[i] >> 4) & 0xf];
        return (sum);
 }
+#endif  /*  EXT3FS_DEBUG  */
diff --git a/fs/ext3/bitmap.h b/fs/ext3/bitmap.h
deleted file mode 100644
index 6ee503a6bb4e..000000000000
--- a/fs/ext3/bitmap.h
+++ /dev/null
@@ -1,8 +0,0 @@
-/*  linux/fs/ext3/bitmap.c
- *
- * Copyright (C) 2005 Simtec Electronics
- *      Ben Dooks <ben@simtec.co.uk>
- *
-*/
-extern unsigned long ext3_count_free (struct buffer_head *, unsigned int );
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 9e4a24376210..dc826464f313 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -26,7 +26,6 @@
 #include <asm/byteorder.h>
-#include "bitmap.h"
 #include "xattr.h"
 #include "acl.h"
@@ -651,7 +650,7 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
        /* Error cases - e2fsck has already cleaned up for us */
        if (ino > max_ino) {
                ext3_warning(sb, __FUNCTION__,
-                             "bad orphan ino %lu!  e2fsck was run?\n", ino);
+                             "bad orphan ino %lu!  e2fsck was run?", ino);
                goto out;
        }
@@ -660,7 +659,7 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
        bitmap_bh = read_inode_bitmap(sb, block_group);
        if (!bitmap_bh) {
                ext3_warning(sb, __FUNCTION__,
-                             "inode bitmap error for orphan %lu\n", ino);
+                             "inode bitmap error for orphan %lu", ino);
                goto out;
        }
@@ -672,7 +671,7 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
                        !(inode = iget(sb, ino)) || is_bad_inode(inode) ||
                        NEXT_ORPHAN(inode) > max_ino) {
                ext3_warning(sb, __FUNCTION__,
-                             "bad orphan inode %lu!  e2fsck was run?\n", ino);
+                             "bad orphan inode %lu!  e2fsck was run?", ino);
                printk(KERN_NOTICE "ext3_test_bit(bit=%d, block=%llu) = %d\n",
                       bit, (unsigned long long)bitmap_bh->b_blocknr,
                       ext3_test_bit(bit, bitmap_bh->b_data));
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index 706d68608381..556cd5510078 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -9,6 +9,7 @@
 #include <linux/fs.h>
 #include <linux/jbd.h>
+#include <linux/capability.h>
 #include <linux/ext3_fs.h>
 #include <linux/ext3_jbd.h>
 #include <linux/time.h>
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index b3c690a3b54a..8bd8ac077704 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1005,10 +1005,7 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str
                if (!inode)
                        return ERR_PTR(-EACCES);
        }
-        if (inode)
+        return d_splice_alias(inode, dentry);
-                return d_splice_alias(inode, dentry);
-        d_add(dentry, inode);
-        return NULL;
 }
@@ -1476,7 +1473,7 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
                if (levels && (dx_get_count(frames->entries) ==
                               dx_get_limit(frames->entries))) {
                        ext3_warning(sb, __FUNCTION__,
-                                     "Directory index full!\n");
+                                     "Directory index full!");
                        err = -ENOSPC;
                        goto cleanup;
                }
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 6104ad310507..1041dab6de2f 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -31,7 +31,7 @@ static int verify_group_input(struct super_block *sb,
        unsigned start = le32_to_cpu(es->s_blocks_count);
        unsigned end = start + input->blocks_count;
        unsigned group = input->group;
-        unsigned itend = input->inode_table + EXT3_SB(sb)->s_itb_per_group;
+        unsigned itend = input->inode_table + sbi->s_itb_per_group;
        unsigned overhead = ext3_bg_has_super(sb, group) ?
                (1 + ext3_bg_num_gdb(sb, group) +
                 le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
@@ -340,7 +340,7 @@ static int verify_reserved_gdb(struct super_block *sb,
        while ((grp = ext3_list_backups(sb, &three, &five, &seven)) < end) {
                if (le32_to_cpu(*p++) != grp * EXT3_BLOCKS_PER_GROUP(sb) + blk){
                        ext3_warning(sb, __FUNCTION__,
-                                     "reserved GDT %ld missing grp %d (%ld)\n",
+                                     "reserved GDT %ld missing grp %d (%ld)",
                                     blk, grp,
                                     grp * EXT3_BLOCKS_PER_GROUP(sb) + blk);
                        return -EINVAL;
@@ -393,7 +393,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
        if (EXT3_SB(sb)->s_sbh->b_blocknr !=
            le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) {
                ext3_warning(sb, __FUNCTION__,
-                        "won't resize using backup superblock at %llu\n",
+                        "won't resize using backup superblock at %llu",
                        (unsigned long long)EXT3_SB(sb)->s_sbh->b_blocknr);
                return -EPERM;
        }
@@ -417,7 +417,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
        data = (__u32 *)dind->b_data;
        if (le32_to_cpu(data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)]) != gdblock) {
                ext3_warning(sb, __FUNCTION__,
-                             "new group %u GDT block %lu not reserved\n",
+                             "new group %u GDT block %lu not reserved",
                             input->group, gdblock);
                err = -EINVAL;
                goto exit_dind;
@@ -540,7 +540,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
        for (res = 0; res < reserved_gdb; res++, blk++) {
                if (le32_to_cpu(*data) != blk) {
                        ext3_warning(sb, __FUNCTION__,
-                                     "reserved block %lu not at offset %ld\n",
+                                     "reserved block %lu not at offset %ld",
                                     blk, (long)(data - (__u32 *)dind->b_data));
                        err = -EINVAL;
                        goto exit_bh;
@@ -683,7 +683,7 @@ exit_err:
        if (err) {
                ext3_warning(sb, __FUNCTION__,
                             "can't update backup for group %d (err %d), "
-                             "forcing fsck on next reboot\n", group, err);
+                             "forcing fsck on next reboot", group, err);
                sbi->s_mount_state &= ~EXT3_VALID_FS;
                sbi->s_es->s_state &= ~cpu_to_le16(EXT3_VALID_FS);
                mark_buffer_dirty(sbi->s_sbh);
@@ -722,7 +722,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
        if (gdb_off == 0 && !EXT3_HAS_RO_COMPAT_FEATURE(sb,
                                        EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
                ext3_warning(sb, __FUNCTION__,
-                             "Can't resize non-sparse filesystem further\n");
+                             "Can't resize non-sparse filesystem further");
                return -EPERM;
        }
@@ -730,13 +730,13 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
                if (!EXT3_HAS_COMPAT_FEATURE(sb,
                                             EXT3_FEATURE_COMPAT_RESIZE_INODE)){
                        ext3_warning(sb, __FUNCTION__,
-                                     "No reserved GDT blocks, can't resize\n");
+                                     "No reserved GDT blocks, can't resize");
                        return -EPERM;
                }
                inode = iget(sb, EXT3_RESIZE_INO);
                if (!inode || is_bad_inode(inode)) {
                        ext3_warning(sb, __FUNCTION__,
-                                     "Error opening resize inode\n");
+                                     "Error opening resize inode");
                        iput(inode);
                        return -ENOENT;
                }
@@ -764,9 +764,9 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
        }
        lock_super(sb);
-        if (input->group != EXT3_SB(sb)->s_groups_count) {
+        if (input->group != sbi->s_groups_count) {
                ext3_warning(sb, __FUNCTION__,
-                             "multiple resizers run on filesystem!\n");
+                             "multiple resizers run on filesystem!");
                err = -EBUSY;
                goto exit_journal;
        }
@@ -799,7 +799,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
         * data.  So we need to be careful to set all of the relevant
         * group descriptor data etc. *before* we enable the group.
         *
-         * The key field here is EXT3_SB(sb)->s_groups_count: as long as
+         * The key field here is sbi->s_groups_count: as long as
         * that retains its old value, nobody is going to access the new
         * group.
         *
@@ -859,7 +859,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
        smp_wmb();
        /* Update the global fs size fields */
-        EXT3_SB(sb)->s_groups_count++;
+        sbi->s_groups_count++;
        ext3_journal_dirty_metadata(handle, primary);
@@ -874,7 +874,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
        percpu_counter_mod(&sbi->s_freeinodes_counter,
                           EXT3_INODES_PER_GROUP(sb));
-        ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
+        ext3_journal_dirty_metadata(handle, sbi->s_sbh);
        sb->s_dirt = 1;
 exit_journal:
@@ -937,7 +937,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
        if (last == 0) {
                ext3_warning(sb, __FUNCTION__,
-                             "need to use ext2online to resize further\n");
+                             "need to use ext2online to resize further");
                return -EPERM;
        }
@@ -973,7 +973,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
        lock_super(sb);
        if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) {
                ext3_warning(sb, __FUNCTION__,
-                             "multiple resizers run on filesystem!\n");
+                             "multiple resizers run on filesystem!");
                err = -EBUSY;
                goto exit_put;
        }
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 4e6730622d90..56bf76586019 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -43,7 +43,8 @@
 #include "acl.h"
 #include "namei.h"
-static int ext3_load_journal(struct super_block *, struct ext3_super_block *);
+static int ext3_load_journal(struct super_block *, struct ext3_super_block *,
+                             unsigned long journal_devnum);
 static int ext3_create_journal(struct super_block *, struct ext3_super_block *,
                               int);
 static void ext3_commit_super (struct super_block * sb,
@@ -628,7 +629,7 @@ enum {
        Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov,
        Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
        Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh,
-        Opt_commit, Opt_journal_update, Opt_journal_inum,
+        Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
        Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
        Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
        Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
@@ -666,6 +667,7 @@ static match_table_t tokens = {
        {Opt_commit, "commit=%u"},
        {Opt_journal_update, "journal=update"},
        {Opt_journal_inum, "journal=%u"},
+        {Opt_journal_dev, "journal_dev=%u"},
        {Opt_abort, "abort"},
        {Opt_data_journal, "data=journal"},
        {Opt_data_ordered, "data=ordered"},
@@ -705,8 +707,9 @@ static unsigned long get_sb_block(void **data)
        return sb_block;
 }
-static int parse_options (char * options, struct super_block *sb,
+static int parse_options (char *options, struct super_block *sb,
-                          unsigned long * inum, unsigned long *n_blocks_count, int is_remount)
+                          unsigned long *inum, unsigned long *journal_devnum,
+                          unsigned long *n_blocks_count, int is_remount)
 {
        struct ext3_sb_info *sbi = EXT3_SB(sb);
        char * p;
@@ -839,6 +842,16 @@ static int parse_options (char * options, struct super_block *sb,
                                return 0;
                        *inum = option;
                        break;
+                case Opt_journal_dev:
+                        if (is_remount) {
+                                printk(KERN_ERR "EXT3-fs: cannot specify "
+                                       "journal on remount\n");
+                                return 0;
+                        }
+                        if (match_int(&args[0], &option))
+                                return 0;
+                        *journal_devnum = option;
+                        break;
                case Opt_noload:
                        set_opt (sbi->s_mount_opt, NOLOAD);
                        break;
@@ -1331,6 +1344,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
        unsigned long logic_sb_block;
        unsigned long offset = 0;
        unsigned long journal_inum = 0;
+        unsigned long journal_devnum = 0;
        unsigned long def_mount_opts;
        struct inode *root;
        int blocksize;
@@ -1411,7 +1425,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
        set_opt(sbi->s_mount_opt, RESERVATION);
-        if (!parse_options ((char *) data, sb, &journal_inum, NULL, 0))
+        if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum,
+                            NULL, 0))
                goto failed_mount;
        sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
@@ -1622,7 +1637,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
         */
        if (!test_opt(sb, NOLOAD) &&
            EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) {
-                if (ext3_load_journal(sb, es))
+                if (ext3_load_journal(sb, es, journal_devnum))
                        goto failed_mount2;
        } else if (journal_inum) {
                if (ext3_create_journal(sb, es, journal_inum))
@@ -1902,15 +1917,24 @@ out_bdev:
        return NULL;
 }
-static int ext3_load_journal(struct super_block * sb,
+static int ext3_load_journal(struct super_block *sb,
-                             struct ext3_super_block * es)
+                             struct ext3_super_block *es,
+                             unsigned long journal_devnum)
 {
        journal_t *journal;
        int journal_inum = le32_to_cpu(es->s_journal_inum);
-        dev_t journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
+        dev_t journal_dev;
        int err = 0;
        int really_read_only;
+        if (journal_devnum &&
+            journal_devnum != le32_to_cpu(es->s_journal_dev)) {
+                printk(KERN_INFO "EXT3-fs: external journal device major/minor "
+                        "numbers have changed\n");
+                journal_dev = new_decode_dev(journal_devnum);
+        } else
+                journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
        really_read_only = bdev_read_only(sb->s_bdev);
        /*
@@ -1969,6 +1993,16 @@ static int ext3_load_journal(struct super_block * sb,
        EXT3_SB(sb)->s_journal = journal;
        ext3_clear_journal_err(sb, es);
+        if (journal_devnum &&
+            journal_devnum != le32_to_cpu(es->s_journal_dev)) {
+                es->s_journal_dev = cpu_to_le32(journal_devnum);
+                sb->s_dirt = 1;
+                /* Make sure we flush the recovery flag to disk. */
+                ext3_commit_super(sb, es, 1);
+        }
        return 0;
 }
@@ -2116,7 +2150,7 @@ int ext3_force_commit(struct super_block *sb)
 static void ext3_write_super (struct super_block * sb)
 {
-        if (down_trylock(&sb->s_lock) == 0)
+        if (mutex_trylock(&sb->s_lock) != 0)
                BUG();
        sb->s_dirt = 0;
 }
@@ -2197,7 +2231,7 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
        /*
         * Allow the "check" option to be passed as a remount option.
         */
-        if (!parse_options(data, sb, NULL, &n_blocks_count, 1)) {
+        if (!parse_options(data, sb, NULL, NULL, &n_blocks_count, 1)) {
                err = -EINVAL;
                goto restore_opts;
        }
@@ -2567,7 +2601,7 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type,
        struct buffer_head *bh;
        handle_t *handle = journal_current_handle();
-        down(&inode->i_sem);
+        mutex_lock(&inode->i_mutex);
        while (towrite > 0) {
                tocopy = sb->s_blocksize - offset < towrite ?
                                sb->s_blocksize - offset : towrite;
@@ -2610,7 +2644,7 @@ out:
        inode->i_version++;
        inode->i_mtime = inode->i_ctime = CURRENT_TIME;
        ext3_mark_inode_dirty(handle, inode);
-        up(&inode->i_sem);
+        mutex_unlock(&inode->i_mutex);
        return len - towrite;
 }
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index 430de9f63be3..e8d60bf6b7df 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -140,7 +140,7 @@ ext3_xattr_handler(int name_index)
 /*
 * Inode operation listxattr()
 *
- * dentry->d_inode->i_sem: don't care
+ * dentry->d_inode->i_mutex: don't care
 */
 ssize_t
 ext3_listxattr(struct dentry *dentry, char *buffer, size_t size)
@@ -946,10 +946,6 @@ ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
        };
        int error;
-        if (IS_RDONLY(inode))
-                return -EROFS;
-        if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-                return -EPERM;
        if (!name)
                return -EINVAL;
        if (strlen(name) > 255)
diff --git a/fs/ext3/xattr_trusted.c b/fs/ext3/xattr_trusted.c
index f68bfd1cf519..86d91f1186dc 100644
--- a/fs/ext3/xattr_trusted.c
+++ b/fs/ext3/xattr_trusted.c
@@ -7,6 +7,7 @@
 #include <linux/module.h>
 #include <linux/string.h>
+#include <linux/capability.h>
 #include <linux/fs.h>
 #include <linux/smp_lock.h>
 #include <linux/ext3_jbd.h>
@@ -39,8 +40,6 @@ ext3_xattr_trusted_get(struct inode *inode, const char *name,
 {
        if (strcmp(name, "") == 0)
                return -EINVAL;
-        if (!capable(CAP_SYS_ADMIN))
-                return -EPERM;
        return ext3_xattr_get(inode, EXT3_XATTR_INDEX_TRUSTED, name,
                              buffer, size);
 }
@@ -51,8 +50,6 @@ ext3_xattr_trusted_set(struct inode *inode, const char *name,
 {
        if (strcmp(name, "") == 0)
                return -EINVAL;
-        if (!capable(CAP_SYS_ADMIN))
-                return -EPERM;
        return ext3_xattr_set(inode, EXT3_XATTR_INDEX_TRUSTED, name,
                              value, size, flags);
 }
diff --git a/fs/ext3/xattr_user.c b/fs/ext3/xattr_user.c
index e907cae7a07c..a85a0a17c4fd 100644
--- a/fs/ext3/xattr_user.c
+++ b/fs/ext3/xattr_user.c
@@ -37,16 +37,10 @@ static int
 ext3_xattr_user_get(struct inode *inode, const char *name,
                    void *buffer, size_t size)
 {
-        int error;
        if (strcmp(name, "") == 0)
                return -EINVAL;
        if (!test_opt(inode->i_sb, XATTR_USER))
                return -EOPNOTSUPP;
-        error = permission(inode, MAY_READ, NULL);
-        if (error)
-                return error;
        return ext3_xattr_get(inode, EXT3_XATTR_INDEX_USER, name, buffer, size);
 }
@@ -54,19 +48,10 @@ static int
 ext3_xattr_user_set(struct inode *inode, const char *name,
                    const void *value, size_t size, int flags)
 {
-        int error;
        if (strcmp(name, "") == 0)
                return -EINVAL;
        if (!test_opt(inode->i_sb, XATTR_USER))
                return -EOPNOTSUPP;
-        if ( !S_ISREG(inode->i_mode) &&
-            (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
-                return -EPERM;
-        error = permission(inode, MAY_WRITE, NULL);
-        if (error)
-                return error;
        return ext3_xattr_set(inode, EXT3_XATTR_INDEX_USER, name,
                              value, size, flags);
 }
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 77c24fcf712a..1acc941245fb 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -295,7 +295,8 @@ static int fat_bmap_cluster(struct inode *inode, int cluster)
        return dclus;
 }
-int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys)
+int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
+             unsigned long *mapped_blocks)
 {
        struct super_block *sb = inode->i_sb;
        struct msdos_sb_info *sbi = MSDOS_SB(sb);
@@ -303,9 +304,12 @@ int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys)
        int cluster, offset;
        *phys = 0;
+        *mapped_blocks = 0;
        if ((sbi->fat_bits != 32) && (inode->i_ino == MSDOS_ROOT_INO)) {
-                if (sector < (sbi->dir_entries >> sbi->dir_per_block_bits))
+                if (sector < (sbi->dir_entries >> sbi->dir_per_block_bits)) {
                        *phys = sector + sbi->dir_start;
+                        *mapped_blocks = 1;
+                }
                return 0;
        }
        last_block = (MSDOS_I(inode)->mmu_private + (sb->s_blocksize - 1))
@@ -318,7 +322,11 @@ int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys)
        cluster = fat_bmap_cluster(inode, cluster);
        if (cluster < 0)
                return cluster;
-        else if (cluster)
+        else if (cluster) {
                *phys = fat_clus_to_blknr(sbi, cluster) + offset;
+                *mapped_blocks = sbi->sec_per_clus - offset;
+                if (*mapped_blocks > last_block - sector)
+                        *mapped_blocks = last_block - sector;
+        }
        return 0;
 }
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index ba824964b9bb..db0de5c621c7 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -45,8 +45,8 @@ static inline void fat_dir_readahead(struct inode *dir, sector_t iblock,
        if ((sbi->fat_bits != 32) && (dir->i_ino == MSDOS_ROOT_INO))
                return;
-        bh = sb_getblk(sb, phys);
+        bh = sb_find_get_block(sb, phys);
-        if (bh && !buffer_uptodate(bh)) {
+        if (bh == NULL || !buffer_uptodate(bh)) {
                for (sec = 0; sec < sbi->sec_per_clus; sec++)
                        sb_breadahead(sb, phys + sec);
        }
@@ -68,8 +68,8 @@ static int fat__get_entry(struct inode *dir, loff_t *pos,
 {
        struct super_block *sb = dir->i_sb;
        sector_t phys, iblock;
-        int offset;
+        unsigned long mapped_blocks;
-        int err;
+        int err, offset;
 next:
        if (*bh)
@@ -77,7 +77,7 @@ next:
        *bh = NULL;
        iblock = *pos >> sb->s_blocksize_bits;
-        err = fat_bmap(dir, iblock, &phys);
+        err = fat_bmap(dir, iblock, &phys, &mapped_blocks);
        if (err || !phys)
                return -1;      /* beyond EOF or error */
@@ -418,7 +418,7 @@ EODir:
        return err;
 }
-EXPORT_SYMBOL(fat_search_long);
+EXPORT_SYMBOL_GPL(fat_search_long);
 struct fat_ioctl_filldir_callback {
        struct dirent __user *dirent;
@@ -729,13 +729,13 @@ static int fat_dir_ioctl(struct inode * inode, struct file * filp,
        buf.dirent = d1;
        buf.result = 0;
-        down(&inode->i_sem);
+        mutex_lock(&inode->i_mutex);
        ret = -ENOENT;
        if (!IS_DEADDIR(inode)) {
                ret = __fat_readdir(inode, filp, &buf, fat_ioctl_filldir,
                                    short_only, both);
        }
-        up(&inode->i_sem);
+        mutex_unlock(&inode->i_mutex);
        if (ret >= 0)
                ret = buf.result;
        return ret;
@@ -780,7 +780,7 @@ int fat_get_dotdot_entry(struct inode *dir, struct buffer_head **bh,
        return -ENOENT;
 }
-EXPORT_SYMBOL(fat_get_dotdot_entry);
+EXPORT_SYMBOL_GPL(fat_get_dotdot_entry);
 /* See if directory is empty */
 int fat_dir_empty(struct inode *dir)
@@ -803,7 +803,7 @@ int fat_dir_empty(struct inode *dir)
        return result;
 }
-EXPORT_SYMBOL(fat_dir_empty);
+EXPORT_SYMBOL_GPL(fat_dir_empty);
 /*
 * fat_subdirs counts the number of sub-directories of dir. It can be run
@@ -849,7 +849,7 @@ int fat_scan(struct inode *dir, const unsigned char *name,
        return -ENOENT;
 }
-EXPORT_SYMBOL(fat_scan);
+EXPORT_SYMBOL_GPL(fat_scan);
 static int __fat_remove_entries(struct inode *dir, loff_t pos, int nr_slots)
 {
@@ -936,7 +936,7 @@ int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo)
        return 0;
 }
-EXPORT_SYMBOL(fat_remove_entries);
+EXPORT_SYMBOL_GPL(fat_remove_entries);
 static int fat_zeroed_cluster(struct inode *dir, sector_t blknr, int nr_used,
                              struct buffer_head **bhs, int nr_bhs)
@@ -1048,7 +1048,7 @@ error:
        return err;
 }
-EXPORT_SYMBOL(fat_alloc_new_dir);
+EXPORT_SYMBOL_GPL(fat_alloc_new_dir);
 static int fat_add_new_entries(struct inode *dir, void *slots, int nr_slots,
                               int *nr_cluster, struct msdos_dir_entry **de,
@@ -1264,4 +1264,4 @@ error_remove:
        return err;
 }
-EXPORT_SYMBOL(fat_add_entries);
+EXPORT_SYMBOL_GPL(fat_add_entries);
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index 4164cd54c4d1..a1a9e0451217 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -476,6 +476,7 @@ int fat_alloc_clusters(struct inode *inode, int *cluster, int nr_cluster)
                                sbi->prev_free = entry;
                                if (sbi->free_clusters != -1)
                                        sbi->free_clusters--;
+                                sb->s_dirt = 1;
                                cluster[idx_clus] = entry;
                                idx_clus++;
@@ -496,6 +497,7 @@ int fat_alloc_clusters(struct inode *inode, int *cluster, int nr_cluster)
        /* Couldn't allocate the free entries */
        sbi->free_clusters = 0;
+        sb->s_dirt = 1;
        err = -ENOSPC;
 out:
@@ -509,7 +511,6 @@ out:
        }
        for (i = 0; i < nr_bhs; i++)
                brelse(bhs[i]);
-        fat_clusters_flush(sb);
        if (err && idx_clus)
                fat_free_clusters(inode, cluster[0]);
@@ -542,8 +543,10 @@ int fat_free_clusters(struct inode *inode, int cluster)
                }
                ops->ent_put(&fatent, FAT_ENT_FREE);
-                if (sbi->free_clusters != -1)
+                if (sbi->free_clusters != -1) {
                        sbi->free_clusters++;
+                        sb->s_dirt = 1;
+                }
                if (nr_bhs + fatent.nr_bhs > MAX_BUF_PER_PAGE) {
                        if (sb->s_flags & MS_SYNCHRONOUS) {
@@ -578,7 +581,7 @@ error:
        return err;
 }
-EXPORT_SYMBOL(fat_free_clusters);
+EXPORT_SYMBOL_GPL(fat_free_clusters);
 int fat_count_free_clusters(struct super_block *sb)
 {
@@ -605,6 +608,7 @@ int fat_count_free_clusters(struct super_block *sb)
                } while (fat_ent_next(sbi, &fatent));
        }
        sbi->free_clusters = free;
+        sb->s_dirt = 1;
        fatent_brelse(&fatent);
 out:
        unlock_fat(sbi);
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 7134403d5be2..e99c5a73b39e 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -6,11 +6,13 @@
 *  regular file handling primitives for fat-based filesystems
 */
+#include <linux/capability.h>
 #include <linux/module.h>
 #include <linux/time.h>
 #include <linux/msdos_fs.h>
 #include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
+#include <linux/writeback.h>
 int fat_generic_ioctl(struct inode *inode, struct file *filp,
                      unsigned int cmd, unsigned long arg)
@@ -40,7 +42,7 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
                if (err)
                        return err;
-                down(&inode->i_sem);
+                mutex_lock(&inode->i_mutex);
                if (IS_RDONLY(inode)) {
                        err = -EROFS;
@@ -102,7 +104,7 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
                MSDOS_I(inode)->i_attrs = attr & ATTR_UNUSED;
                mark_inode_dirty(inode);
        up:
-                up(&inode->i_sem);
+                mutex_unlock(&inode->i_mutex);
                return err;
        }
        default:
@@ -124,6 +126,24 @@ struct file_operations fat_file_operations = {
        .sendfile       = generic_file_sendfile,
 };
+static int fat_cont_expand(struct inode *inode, loff_t size)
+{
+        struct address_space *mapping = inode->i_mapping;
+        loff_t start = inode->i_size, count = size - inode->i_size;
+        int err;
+        err = generic_cont_expand_simple(inode, size);
+        if (err)
+                goto out;
+        inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC;
+        mark_inode_dirty(inode);
+        if (IS_SYNC(inode))
+                err = sync_page_range_nolock(inode, mapping, start, count);
+out:
+        return err;
+}
 int fat_notify_change(struct dentry *dentry, struct iattr *attr)
 {
        struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb);
@@ -132,11 +152,17 @@ int fat_notify_change(struct dentry *dentry, struct iattr *attr)
        lock_kernel();
-        /* FAT cannot truncate to a longer file */
+        /*
+         * Expand the file. Since inode_setattr() updates ->i_size
+         * before calling the ->truncate(), but FAT needs to fill the
+         * hole before it.
+         */
        if (attr->ia_valid & ATTR_SIZE) {
                if (attr->ia_size > inode->i_size) {
-                        error = -EPERM;
+                        error = fat_cont_expand(inode, attr->ia_size);
-                        goto out;
+                        if (error || attr->ia_valid == ATTR_SIZE)
+                                goto out;
+                        attr->ia_valid &= ~ATTR_SIZE;
                }
        }
@@ -173,7 +199,7 @@ out:
        return error;
 }
-EXPORT_SYMBOL(fat_notify_change);
+EXPORT_SYMBOL_GPL(fat_notify_change);
 /* Free all clusters after the skip'th cluster. */
 static int fat_free(struct inode *inode, int skip)
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index a0f9b9fe1307..e7f4aa7fc686 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -18,10 +18,12 @@
 #include <linux/seq_file.h>
 #include <linux/msdos_fs.h>
 #include <linux/pagemap.h>
+#include <linux/mpage.h>
 #include <linux/buffer_head.h>
 #include <linux/mount.h>
 #include <linux/vfs.h>
 #include <linux/parser.h>
+#include <linux/uio.h>
 #include <asm/unaligned.h>
 #ifndef CONFIG_FAT_DEFAULT_IOCHARSET
@@ -48,51 +50,97 @@ static int fat_add_cluster(struct inode *inode)
        return err;
 }
-static int fat_get_block(struct inode *inode, sector_t iblock,
+static int __fat_get_blocks(struct inode *inode, sector_t iblock,
-                         struct buffer_head *bh_result, int create)
+                            unsigned long *max_blocks,
+                            struct buffer_head *bh_result, int create)
 {
        struct super_block *sb = inode->i_sb;
+        struct msdos_sb_info *sbi = MSDOS_SB(sb);
        sector_t phys;
-        int err;
+        unsigned long mapped_blocks;
+        int err, offset;
-        err = fat_bmap(inode, iblock, &phys);
+        err = fat_bmap(inode, iblock, &phys, &mapped_blocks);
        if (err)
                return err;
        if (phys) {
                map_bh(bh_result, sb, phys);
+                *max_blocks = min(mapped_blocks, *max_blocks);
                return 0;
        }
        if (!create)
                return 0;
        if (iblock != MSDOS_I(inode)->mmu_private >> sb->s_blocksize_bits) {
                fat_fs_panic(sb, "corrupted file size (i_pos %lld, %lld)",
                             MSDOS_I(inode)->i_pos, MSDOS_I(inode)->mmu_private);
                return -EIO;
        }
-        if (!((unsigned long)iblock & (MSDOS_SB(sb)->sec_per_clus - 1))) {
+        offset = (unsigned long)iblock & (sbi->sec_per_clus - 1);
+        if (!offset) {
+                /* TODO: multiple cluster allocation would be desirable. */
                err = fat_add_cluster(inode);
                if (err)
                        return err;
        }
-        MSDOS_I(inode)->mmu_private += sb->s_blocksize;
+        /* available blocks on this cluster */
-        err = fat_bmap(inode, iblock, &phys);
+        mapped_blocks = sbi->sec_per_clus - offset;
+        *max_blocks = min(mapped_blocks, *max_blocks);
+        MSDOS_I(inode)->mmu_private += *max_blocks << sb->s_blocksize_bits;
+        err = fat_bmap(inode, iblock, &phys, &mapped_blocks);
        if (err)
                return err;
-        if (!phys)
+        BUG_ON(!phys);
-                BUG();
+        BUG_ON(*max_blocks != mapped_blocks);
        set_buffer_new(bh_result);
        map_bh(bh_result, sb, phys);
        return 0;
 }
+static int fat_get_blocks(struct inode *inode, sector_t iblock,
+                          unsigned long max_blocks,
+                          struct buffer_head *bh_result, int create)
+{
+        struct super_block *sb = inode->i_sb;
+        int err;
+        err = __fat_get_blocks(inode, iblock, &max_blocks, bh_result, create);
+        if (err)
+                return err;
+        bh_result->b_size = max_blocks << sb->s_blocksize_bits;
+        return 0;
+}
+static int fat_get_block(struct inode *inode, sector_t iblock,
+                         struct buffer_head *bh_result, int create)
+{
+        unsigned long max_blocks = 1;
+        return __fat_get_blocks(inode, iblock, &max_blocks, bh_result, create);
+}
 static int fat_writepage(struct page *page, struct writeback_control *wbc)
 {
        return block_write_full_page(page, fat_get_block, wbc);
 }
+static int fat_writepages(struct address_space *mapping,
+                          struct writeback_control *wbc)
+{
+        return mpage_writepages(mapping, wbc, fat_get_block);
+}
 static int fat_readpage(struct file *file, struct page *page)
 {
-        return block_read_full_page(page, fat_get_block);
+        return mpage_readpage(page, fat_get_block);
+}
+static int fat_readpages(struct file *file, struct address_space *mapping,
+                         struct list_head *pages, unsigned nr_pages)
+{
+        return mpage_readpages(mapping, pages, nr_pages, fat_get_block);
 }
 static int fat_prepare_write(struct file *file, struct page *page,
@@ -115,6 +163,34 @@ static int fat_commit_write(struct file *file, struct page *page,
        return err;
 }
+static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
+                             const struct iovec *iov,
+                             loff_t offset, unsigned long nr_segs)
+{
+        struct file *file = iocb->ki_filp;
+        struct inode *inode = file->f_mapping->host;
+        if (rw == WRITE) {
+                /*
+                 * FIXME: blockdev_direct_IO() doesn't use ->prepare_write(),
+                 * so we need to update the ->mmu_private to block boundary.
+                 *
+                 * But we must fill the remaining area or hole by nul for
+                 * updating ->mmu_private.
+                 */
+                loff_t size = offset + iov_length(iov, nr_segs);
+                if (MSDOS_I(inode)->mmu_private < size)
+                        return -EINVAL;
+        }
+        /*
+         * FAT need to use the DIO_LOCKING for avoiding the race
+         * condition of fat_get_block() and ->truncate().
+         */
+        return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
+                                  offset, nr_segs, fat_get_blocks, NULL);
+}
 static sector_t _fat_bmap(struct address_space *mapping, sector_t block)
 {
        return generic_block_bmap(mapping, block, fat_get_block);
@@ -122,10 +198,13 @@ static sector_t _fat_bmap(struct address_space *mapping, sector_t block)
 static struct address_space_operations fat_aops = {
        .readpage       = fat_readpage,
+        .readpages      = fat_readpages,
        .writepage      = fat_writepage,
+        .writepages     = fat_writepages,
        .sync_page      = block_sync_page,
        .prepare_write  = fat_prepare_write,
        .commit_write   = fat_commit_write,
+        .direct_IO      = fat_direct_IO,
        .bmap           = _fat_bmap
 };
@@ -182,7 +261,7 @@ void fat_attach(struct inode *inode, loff_t i_pos)
        spin_unlock(&sbi->inode_hash_lock);
 }
-EXPORT_SYMBOL(fat_attach);
+EXPORT_SYMBOL_GPL(fat_attach);
 void fat_detach(struct inode *inode)
 {
@@ -193,7 +272,7 @@ void fat_detach(struct inode *inode)
        spin_unlock(&sbi->inode_hash_lock);
 }
-EXPORT_SYMBOL(fat_detach);
+EXPORT_SYMBOL_GPL(fat_detach);
 struct inode *fat_iget(struct super_block *sb, loff_t i_pos)
 {
@@ -347,7 +426,7 @@ out:
        return inode;
 }
-EXPORT_SYMBOL(fat_build_inode);
+EXPORT_SYMBOL_GPL(fat_build_inode);
 static void fat_delete_inode(struct inode *inode)
 {
@@ -374,12 +453,17 @@ static void fat_clear_inode(struct inode *inode)
        unlock_kernel();
 }
-static void fat_put_super(struct super_block *sb)
+static void fat_write_super(struct super_block *sb)
 {
-        struct msdos_sb_info *sbi = MSDOS_SB(sb);
+        sb->s_dirt = 0;
        if (!(sb->s_flags & MS_RDONLY))
                fat_clusters_flush(sb);
+}
+static void fat_put_super(struct super_block *sb)
+{
+        struct msdos_sb_info *sbi = MSDOS_SB(sb);
        if (sbi->nls_disk) {
                unload_nls(sbi->nls_disk);
@@ -537,7 +621,7 @@ int fat_sync_inode(struct inode *inode)
        return fat_write_inode(inode, 1);
 }
-EXPORT_SYMBOL(fat_sync_inode);
+EXPORT_SYMBOL_GPL(fat_sync_inode);
 static int fat_show_options(struct seq_file *m, struct vfsmount *mnt);
 static struct super_operations fat_sops = {
@@ -546,6 +630,7 @@ static struct super_operations fat_sops = {
        .write_inode    = fat_write_inode,
        .delete_inode   = fat_delete_inode,
        .put_super      = fat_put_super,
+        .write_super    = fat_write_super,
        .statfs         = fat_statfs,
        .clear_inode    = fat_clear_inode,
        .remount_fs     = fat_remount,
@@ -1347,7 +1432,7 @@ out_fail:
        return error;
 }
-EXPORT_SYMBOL(fat_fill_super);
+EXPORT_SYMBOL_GPL(fat_fill_super);
 int __init fat_cache_init(void);
 void fat_cache_destroy(void);
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 2a0df2122f5d..32fb0a3f1da4 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -33,7 +33,7 @@ void fat_fs_panic(struct super_block *s, const char *fmt, ...)
        }
 }
-EXPORT_SYMBOL(fat_fs_panic);
+EXPORT_SYMBOL_GPL(fat_fs_panic);
 /* Flushes the number of free clusters on FAT32 */
 /* XXX: Need to write one per FSINFO block.  Currently only writes 1 */
@@ -67,8 +67,6 @@ void fat_clusters_flush(struct super_block *sb)
                if (sbi->prev_free != -1)
                        fsinfo->next_cluster = cpu_to_le32(sbi->prev_free);
                mark_buffer_dirty(bh);
-                if (sb->s_flags & MS_SYNCHRONOUS)
-                        sync_dirty_buffer(bh);
        }
        brelse(bh);
 }
@@ -194,7 +192,7 @@ void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date)
        *date = cpu_to_le16(nl_day-day_n[month-1]+1+(month << 5)+(year << 9));
 }
-EXPORT_SYMBOL(fat_date_unix2dos);
+EXPORT_SYMBOL_GPL(fat_date_unix2dos);
 int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs)
 {
@@ -222,4 +220,4 @@ int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs)
        return err;
 }
-EXPORT_SYMBOL(fat_sync_bhs);
+EXPORT_SYMBOL_GPL(fat_sync_bhs);
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 863b46e0d78a..5f96786d1c73 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -9,6 +9,7 @@
 #include <linux/mm.h>
 #include <linux/fs.h>
 #include <linux/file.h>
+#include <linux/capability.h>
 #include <linux/dnotify.h>
 #include <linux/smp_lock.h>
 #include <linux/slab.h>
@@ -35,7 +36,7 @@ void fastcall set_close_on_exec(unsigned int fd, int flag)
        spin_unlock(&files->file_lock);
 }
-static inline int get_close_on_exec(unsigned int fd)
+static int get_close_on_exec(unsigned int fd)
 {
        struct files_struct *files = current->files;
        struct fdtable *fdt;
@@ -457,11 +458,11 @@ static void send_sigio_to_task(struct task_struct *p,
                        else
                                si.si_band = band_table[reason - POLL_IN];
                        si.si_fd    = fd;
-                        if (!send_group_sig_info(fown->signum, &si, p))
+                        if (!group_send_sig_info(fown->signum, &si, p))
                                break;
                /* fall-through: fall back on the old plain SIGIO signal */
                case 0:
-                        send_group_sig_info(SIGIO, SEND_SIG_PRIV, p);
+                        group_send_sig_info(SIGIO, SEND_SIG_PRIV, p);
        }
 }
@@ -495,7 +496,7 @@ static void send_sigurg_to_task(struct task_struct *p,
                                struct fown_struct *fown)
 {
        if (sigio_perm(p, fown, SIGURG))
-                send_group_sig_info(SIGURG, SEND_SIG_PRIV, p);
+                group_send_sig_info(SIGURG, SEND_SIG_PRIV, p);
 }
 int send_sigurg(struct fown_struct *fown)
diff --git a/fs/fifo.c b/fs/fifo.c
index 5455916241f0..923371b753ab 100644
--- a/fs/fifo.c
+++ b/fs/fifo.c
@@ -35,7 +35,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
        int ret;
        ret = -ERESTARTSYS;
-        if (down_interruptible(PIPE_SEM(*inode)))
+        if (mutex_lock_interruptible(PIPE_MUTEX(*inode)))
                goto err_nolock_nocleanup;
        if (!inode->i_pipe) {
@@ -119,7 +119,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
        }
        /* Ok! */
-        up(PIPE_SEM(*inode));
+        mutex_unlock(PIPE_MUTEX(*inode));
        return 0;
 err_rd:
@@ -139,7 +139,7 @@ err:
                free_pipe_info(inode);
 err_nocleanup:
-        up(PIPE_SEM(*inode));
+        mutex_unlock(PIPE_MUTEX(*inode));
 err_nolock_nocleanup:
        return ret;
diff --git a/fs/file_table.c b/fs/file_table.c
index c3a5e2fd663b..768b58167543 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -16,6 +16,7 @@
 #include <linux/eventpoll.h>
 #include <linux/rcupdate.h>
 #include <linux/mount.h>
+#include <linux/capability.h>
 #include <linux/cdev.h>
 #include <linux/fsnotify.h>
@@ -117,7 +118,7 @@ EXPORT_SYMBOL(get_empty_filp);
 void fastcall fput(struct file *file)
 {
-        if (rcuref_dec_and_test(&file->f_count))
+        if (atomic_dec_and_test(&file->f_count))
                __fput(file);
 }
@@ -166,7 +167,7 @@ struct file fastcall *fget(unsigned int fd)
        rcu_read_lock();
        file = fcheck_files(files, fd);
        if (file) {
-                if (!rcuref_inc_lf(&file->f_count)) {
+                if (!atomic_inc_not_zero(&file->f_count)) {
                        /* File object ref couldn't be taken */
                        rcu_read_unlock();
                        return NULL;
@@ -198,7 +199,7 @@ struct file fastcall *fget_light(unsigned int fd, int *fput_needed)
                rcu_read_lock();
                file = fcheck_files(files, fd);
                if (file) {
-                        if (rcuref_inc_lf(&file->f_count))
+                        if (atomic_inc_not_zero(&file->f_count))
                                *fput_needed = 1;
                        else
                                /* Didn't get the reference, someone's freed */
@@ -213,7 +214,7 @@ struct file fastcall *fget_light(unsigned int fd, int *fput_needed)
 void put_filp(struct file *file)
 {
-        if (rcuref_dec_and_test(&file->f_count)) {
+        if (atomic_dec_and_test(&file->f_count)) {
                security_file_free(file);
                file_kill(file);
                file_free(file);
diff --git a/fs/freevxfs/vxfs_immed.c b/fs/freevxfs/vxfs_immed.c
index d0401dc68d41..6f5df1700e95 100644
--- a/fs/freevxfs/vxfs_immed.c
+++ b/fs/freevxfs/vxfs_immed.c
@@ -99,8 +99,8 @@ static int
 vxfs_immed_readpage(struct file *fp, struct page *pp)
 {
        struct vxfs_inode_info  *vip = VXFS_INO(pp->mapping->host);
-        u_int64_t               offset = pp->index << PAGE_CACHE_SHIFT;
+        u_int64_t       offset = (u_int64_t)pp->index << PAGE_CACHE_SHIFT;
-        caddr_t                 kaddr;
+        caddr_t         kaddr;
        kaddr = kmap(pp);
        memcpy(kaddr, vip->vii_immed.vi_immed + offset, PAGE_CACHE_SIZE);
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index e08ab4702d97..4526da8907c6 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -21,18 +21,18 @@ MODULE_ALIAS_MISCDEV(FUSE_MINOR);
 static kmem_cache_t *fuse_req_cachep;
-static inline struct fuse_conn *fuse_get_conn(struct file *file)
+static struct fuse_conn *fuse_get_conn(struct file *file)
 {
        struct fuse_conn *fc;
        spin_lock(&fuse_lock);
        fc = file->private_data;
-        if (fc && !fc->mounted)
+        if (fc && !fc->connected)
                fc = NULL;
        spin_unlock(&fuse_lock);
        return fc;
 }
-static inline void fuse_request_init(struct fuse_req *req)
+static void fuse_request_init(struct fuse_req *req)
 {
        memset(req, 0, sizeof(*req));
        INIT_LIST_HEAD(&req->list);
@@ -53,7 +53,7 @@ void fuse_request_free(struct fuse_req *req)
        kmem_cache_free(fuse_req_cachep, req);
 }
-static inline void block_sigs(sigset_t *oldset)
+static void block_sigs(sigset_t *oldset)
 {
        sigset_t mask;
@@ -61,7 +61,7 @@ static inline void block_sigs(sigset_t *oldset)
        sigprocmask(SIG_BLOCK, &mask, oldset);
 }
-static inline void restore_sigs(sigset_t *oldset)
+static void restore_sigs(sigset_t *oldset)
 {
        sigprocmask(SIG_SETMASK, oldset, NULL);
 }
@@ -109,18 +109,24 @@ struct fuse_req *fuse_get_request(struct fuse_conn *fc)
        int intr;
        sigset_t oldset;
+        atomic_inc(&fc->num_waiting);
        block_sigs(&oldset);
        intr = down_interruptible(&fc->outstanding_sem);
        restore_sigs(&oldset);
-        return intr ? NULL : do_get_request(fc);
+        if (intr) {
+                atomic_dec(&fc->num_waiting);
+                return NULL;
+        }
+        return do_get_request(fc);
 }
 static void fuse_putback_request(struct fuse_conn *fc, struct fuse_req *req)
 {
        spin_lock(&fuse_lock);
-        if (req->preallocated)
+        if (req->preallocated) {
+                atomic_dec(&fc->num_waiting);
                list_add(&req->list, &fc->unused_list);
-        else
+        } else
                fuse_request_free(req);
        /* If we are in debt decrease that first */
@@ -148,42 +154,23 @@ void fuse_release_background(struct fuse_req *req)
        spin_unlock(&fuse_lock);
 }
-static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
-{
-        int i;
-        struct fuse_init_out *arg = &req->misc.init_out;
-        if (arg->major != FUSE_KERNEL_VERSION)
-                fc->conn_error = 1;
-        else {
-                fc->minor = arg->minor;
-                fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
-        }
-        /* After INIT reply is received other requests can go
-           out.  So do (FUSE_MAX_OUTSTANDING - 1) number of
-           up()s on outstanding_sem.  The last up() is done in
-           fuse_putback_request() */
-        for (i = 1; i < FUSE_MAX_OUTSTANDING; i++)
-                up(&fc->outstanding_sem);
-}
 /*
 * This function is called when a request is finished.  Either a reply
 * has arrived or it was interrupted (and not yet sent) or some error
- * occurred during communication with userspace, or the device file was
+ * occurred during communication with userspace, or the device file
- * closed.  It decreases the reference count for the request.  In case
+ * was closed.  In case of a background request the reference to the
- * of a background request the reference to the stored objects are
+ * stored objects are released.  The requester thread is woken up (if
- * released.  The requester thread is woken up (if still waiting), and
+ * still waiting), the 'end' callback is called if given, else the
- * finally the request is either freed or put on the unused_list
+ * reference to the request is released
 *
 * Called with fuse_lock, unlocks it
 */
 static void request_end(struct fuse_conn *fc, struct fuse_req *req)
 {
-        int putback;
+        void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
-        req->finished = 1;
+        req->end = NULL;
-        putback = atomic_dec_and_test(&req->count);
+        list_del(&req->list);
+        req->state = FUSE_REQ_FINISHED;
        spin_unlock(&fuse_lock);
        if (req->background) {
                down_read(&fc->sbput_sem);
@@ -192,18 +179,10 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
                up_read(&fc->sbput_sem);
        }
        wake_up(&req->waitq);
-        if (req->in.h.opcode == FUSE_INIT)
+        if (end)
-                process_init_reply(fc, req);
+                end(fc, req);
-        else if (req->in.h.opcode == FUSE_RELEASE && req->inode == NULL) {
+        else
-                /* Special case for failed iget in CREATE */
+                fuse_put_request(fc, req);
-                u64 nodeid = req->in.h.nodeid;
-                __fuse_get_request(req);
-                fuse_reset_request(req);
-                fuse_send_forget(fc, req, nodeid, 1);
-                putback = 0;
-        }
-        if (putback)
-                fuse_putback_request(fc, req);
 }
 /*
@@ -254,14 +233,16 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
        spin_unlock(&fuse_lock);
        block_sigs(&oldset);
-        wait_event_interruptible(req->waitq, req->finished);
+        wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED);
        restore_sigs(&oldset);
        spin_lock(&fuse_lock);
-        if (req->finished)
+        if (req->state == FUSE_REQ_FINISHED && !req->interrupted)
                return;
-        req->out.h.error = -EINTR;
+        if (!req->interrupted) {
-        req->interrupted = 1;
+                req->out.h.error = -EINTR;
+                req->interrupted = 1;
+        }
        if (req->locked) {
                /* This is uninterruptible sleep, because data is
                   being copied to/from the buffers of req.  During
@@ -272,10 +253,10 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
                wait_event(req->waitq, !req->locked);
                spin_lock(&fuse_lock);
        }
-        if (!req->sent && !list_empty(&req->list)) {
+        if (req->state == FUSE_REQ_PENDING) {
                list_del(&req->list);
                __fuse_put_request(req);
-        } else if (!req->finished && req->sent)
+        } else if (req->state == FUSE_REQ_SENT)
                background_request(fc, req);
 }
@@ -310,6 +291,7 @@ static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
                        fc->outstanding_debt++;
        }
        list_add_tail(&req->list, &fc->pending);
+        req->state = FUSE_REQ_PENDING;
        wake_up(&fc->waitq);
 }
@@ -362,34 +344,12 @@ void request_send_background(struct fuse_conn *fc, struct fuse_req *req)
        request_send_nowait(fc, req);
 }
-void fuse_send_init(struct fuse_conn *fc)
-{
-        /* This is called from fuse_read_super() so there's guaranteed
-           to be a request available */
-        struct fuse_req *req = do_get_request(fc);
-        struct fuse_init_in *arg = &req->misc.init_in;
-        arg->major = FUSE_KERNEL_VERSION;
-        arg->minor = FUSE_KERNEL_MINOR_VERSION;
-        req->in.h.opcode = FUSE_INIT;
-        req->in.numargs = 1;
-        req->in.args[0].size = sizeof(*arg);
-        req->in.args[0].value = arg;
-        req->out.numargs = 1;
-        /* Variable length arguement used for backward compatibility
-           with interface version < 7.5.  Rest of init_out is zeroed
-           by do_get_request(), so a short reply is not a problem */
-        req->out.argvar = 1;
-        req->out.args[0].size = sizeof(struct fuse_init_out);
-        req->out.args[0].value = &req->misc.init_out;
-        request_send_background(fc, req);
-}
 /*
 * Lock the request.  Up to the next unlock_request() there mustn't be
 * anything that could cause a page-fault.  If the request was already
 * interrupted bail out.
 */
-static inline int lock_request(struct fuse_req *req)
+static int lock_request(struct fuse_req *req)
 {
        int err = 0;
        if (req) {
@@ -408,7 +368,7 @@ static inline int lock_request(struct fuse_req *req)
 * requester thread is currently waiting for it to be unlocked, so
 * wake it up.
 */
-static inline void unlock_request(struct fuse_req *req)
+static void unlock_request(struct fuse_req *req)
 {
        if (req) {
                spin_lock(&fuse_lock);
@@ -444,7 +404,7 @@ static void fuse_copy_init(struct fuse_copy_state *cs, int write,
 }
 /* Unmap and put previous page of userspace buffer */
-static inline void fuse_copy_finish(struct fuse_copy_state *cs)
+static void fuse_copy_finish(struct fuse_copy_state *cs)
 {
        if (cs->mapaddr) {
                kunmap_atomic(cs->mapaddr, KM_USER0);
@@ -493,8 +453,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
 }
 /* Do as much copy to/from userspace buffer as we can */
-static inline int fuse_copy_do(struct fuse_copy_state *cs, void **val,
+static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
-                               unsigned *size)
 {
        unsigned ncpy = min(*size, cs->len);
        if (val) {
@@ -514,8 +473,8 @@ static inline int fuse_copy_do(struct fuse_copy_state *cs, void **val,
 * Copy a page in the request to/from the userspace buffer.  Must be
 * done atomically
 */
-static inline int fuse_copy_page(struct fuse_copy_state *cs, struct page *page,
+static int fuse_copy_page(struct fuse_copy_state *cs, struct page *page,
-                                 unsigned offset, unsigned count, int zeroing)
+                          unsigned offset, unsigned count, int zeroing)
 {
        if (page && zeroing && count < PAGE_SIZE) {
                void *mapaddr = kmap_atomic(page, KM_USER1);
@@ -597,7 +556,7 @@ static void request_wait(struct fuse_conn *fc)
        DECLARE_WAITQUEUE(wait, current);
        add_wait_queue_exclusive(&fc->waitq, &wait);
-        while (fc->mounted && list_empty(&fc->pending)) {
+        while (fc->connected && list_empty(&fc->pending)) {
                set_current_state(TASK_INTERRUPTIBLE);
                if (signal_pending(current))
                        break;
@@ -637,14 +596,15 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
                goto err_unlock;
        request_wait(fc);
        err = -ENODEV;
-        if (!fc->mounted)
+        if (!fc->connected)
                goto err_unlock;
        err = -ERESTARTSYS;
        if (list_empty(&fc->pending))
                goto err_unlock;
        req = list_entry(fc->pending.next, struct fuse_req, list);
-        list_del_init(&req->list);
+        req->state = FUSE_REQ_READING;
+        list_move(&req->list, &fc->io);
        in = &req->in;
        reqsize = in->h.len;
@@ -677,8 +637,8 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
        if (!req->isreply)
                request_end(fc, req);
        else {
-                req->sent = 1;
+                req->state = FUSE_REQ_SENT;
-                list_add_tail(&req->list, &fc->processing);
+                list_move_tail(&req->list, &fc->processing);
                spin_unlock(&fuse_lock);
        }
        return reqsize;
@@ -766,17 +726,23 @@ static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
                goto err_finish;
        spin_lock(&fuse_lock);
+        err = -ENOENT;
+        if (!fc->connected)
+                goto err_unlock;
        req = request_find(fc, oh.unique);
        err = -EINVAL;
        if (!req)
                goto err_unlock;
-        list_del_init(&req->list);
        if (req->interrupted) {
-                request_end(fc, req);
+                spin_unlock(&fuse_lock);
                fuse_copy_finish(&cs);
+                spin_lock(&fuse_lock);
+                request_end(fc, req);
                return -ENOENT;
        }
+        list_move(&req->list, &fc->io);
        req->out.h = oh;
        req->locked = 1;
        cs.req = req;
@@ -830,19 +796,90 @@ static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
        return mask;
 }
-/* Abort all requests on the given list (pending or processing) */
+/*
+ * Abort all requests on the given list (pending or processing)
+ *
+ * This function releases and reacquires fuse_lock
+ */
 static void end_requests(struct fuse_conn *fc, struct list_head *head)
 {
        while (!list_empty(head)) {
                struct fuse_req *req;
                req = list_entry(head->next, struct fuse_req, list);
-                list_del_init(&req->list);
                req->out.h.error = -ECONNABORTED;
                request_end(fc, req);
                spin_lock(&fuse_lock);
        }
 }
+/*
+ * Abort requests under I/O
+ *
+ * The requests are set to interrupted and finished, and the request
+ * waiter is woken up.  This will make request_wait_answer() wait
+ * until the request is unlocked and then return.
+ *
+ * If the request is asynchronous, then the end function needs to be
+ * called after waiting for the request to be unlocked (if it was
+ * locked).
+ */
+static void end_io_requests(struct fuse_conn *fc)
+{
+        while (!list_empty(&fc->io)) {
+                struct fuse_req *req =
+                        list_entry(fc->io.next, struct fuse_req, list);
+                void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
+                req->interrupted = 1;
+                req->out.h.error = -ECONNABORTED;
+                req->state = FUSE_REQ_FINISHED;
+                list_del_init(&req->list);
+                wake_up(&req->waitq);
+                if (end) {
+                        req->end = NULL;
+                        /* The end function will consume this reference */
+                        __fuse_get_request(req);
+                        spin_unlock(&fuse_lock);
+                        wait_event(req->waitq, !req->locked);
+                        end(fc, req);
+                        spin_lock(&fuse_lock);
+                }
+        }
+}
+/*
+ * Abort all requests.
+ *
+ * Emergency exit in case of a malicious or accidental deadlock, or
+ * just a hung filesystem.
+ *
+ * The same effect is usually achievable through killing the
+ * filesystem daemon and all users of the filesystem.  The exception
+ * is the combination of an asynchronous request and the tricky
+ * deadlock (see Documentation/filesystems/fuse.txt).
+ *
+ * During the aborting, progression of requests from the pending and
+ * processing lists onto the io list, and progression of new requests
+ * onto the pending list is prevented by req->connected being false.
+ *
+ * Progression of requests under I/O to the processing list is
+ * prevented by the req->interrupted flag being true for these
+ * requests.  For this reason requests on the io list must be aborted
+ * first.
+ */
+void fuse_abort_conn(struct fuse_conn *fc)
+{
+        spin_lock(&fuse_lock);
+        if (fc->connected) {
+                fc->connected = 0;
+                end_io_requests(fc);
+                end_requests(fc, &fc->pending);
+                end_requests(fc, &fc->processing);
+                wake_up_all(&fc->waitq);
+        }
+        spin_unlock(&fuse_lock);
+}
 static int fuse_dev_release(struct inode *inode, struct file *file)
 {
        struct fuse_conn *fc;
@@ -853,9 +890,11 @@ static int fuse_dev_release(struct inode *inode, struct file *file)
                fc->connected = 0;
                end_requests(fc, &fc->pending);
                end_requests(fc, &fc->processing);
-                fuse_release_conn(fc);
        }
        spin_unlock(&fuse_lock);
+        if (fc)
+                kobject_put(&fc->kobj);
        return 0;
 }
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 417bcee466f6..21fd59c7bc24 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -23,8 +23,7 @@
 /*
 * Calculate the time in jiffies until a dentry/attributes are valid
 */
-static inline unsigned long time_to_jiffies(unsigned long sec,
+static unsigned long time_to_jiffies(unsigned long sec, unsigned long nsec)
-                                            unsigned long nsec)
 {
        struct timespec ts = {sec, nsec};
        return jiffies + timespec_to_jiffies(&ts);
@@ -157,7 +156,7 @@ static int dir_alias(struct inode *inode)
        return 0;
 }
-static inline int invalid_nodeid(u64 nodeid)
+static int invalid_nodeid(u64 nodeid)
 {
        return !nodeid || nodeid == FUSE_ROOT_ID;
 }
@@ -166,7 +165,7 @@ static struct dentry_operations fuse_dentry_operations = {
        .d_revalidate   = fuse_dentry_revalidate,
 };
-static inline int valid_mode(int m)
+static int valid_mode(int m)
 {
        return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) ||
                S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
@@ -763,13 +762,6 @@ static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
        return 0;
 }
-static inline size_t fuse_send_readdir(struct fuse_req *req, struct file *file,
-                                       struct inode *inode, loff_t pos,
-                                       size_t count)
-{
-        return fuse_send_read_common(req, file, inode, pos, count, 1);
-}
 static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
 {
        int err;
@@ -793,7 +785,9 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
        }
        req->num_pages = 1;
        req->pages[0] = page;
-        nbytes = fuse_send_readdir(req, file, inode, file->f_pos, PAGE_SIZE);
+        fuse_read_fill(req, file, inode, file->f_pos, PAGE_SIZE, FUSE_READDIR);
+        request_send(fc, req);
+        nbytes = req->out.args[0].size;
        err = req->out.h.error;
        fuse_put_request(fc, req);
        if (!err)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 05dedddf4289..a7ef5e716f3c 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -113,6 +113,14 @@ int fuse_open_common(struct inode *inode, struct file *file, int isdir)
        return err;
 }
+/* Special case for failed iget in CREATE */
+static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
+{
+        u64 nodeid = req->in.h.nodeid;
+        fuse_reset_request(req);
+        fuse_send_forget(fc, req, nodeid, 1);
+}
 void fuse_send_release(struct fuse_conn *fc, struct fuse_file *ff,
                       u64 nodeid, struct inode *inode, int flags, int isdir)
 {
@@ -128,6 +136,8 @@ void fuse_send_release(struct fuse_conn *fc, struct fuse_file *ff,
        req->in.args[0].size = sizeof(struct fuse_release_in);
        req->in.args[0].value = inarg;
        request_send_background(fc, req);
+        if (!inode)
+                req->end = fuse_release_end;
        kfree(ff);
 }
@@ -240,38 +250,35 @@ static int fuse_fsync(struct file *file, struct dentry *de, int datasync)
        return fuse_fsync_common(file, de, datasync, 0);
 }
-size_t fuse_send_read_common(struct fuse_req *req, struct file *file,
+void fuse_read_fill(struct fuse_req *req, struct file *file,
-                             struct inode *inode, loff_t pos, size_t count,
+                    struct inode *inode, loff_t pos, size_t count, int opcode)
-                             int isdir)
 {
-        struct fuse_conn *fc = get_fuse_conn(inode);
        struct fuse_file *ff = file->private_data;
-        struct fuse_read_in inarg;
+        struct fuse_read_in *inarg = &req->misc.read_in;
-        memset(&inarg, 0, sizeof(struct fuse_read_in));
+        inarg->fh = ff->fh;
-        inarg.fh = ff->fh;
+        inarg->offset = pos;
-        inarg.offset = pos;
+        inarg->size = count;
-        inarg.size = count;
+        req->in.h.opcode = opcode;
-        req->in.h.opcode = isdir ? FUSE_READDIR : FUSE_READ;
        req->in.h.nodeid = get_node_id(inode);
        req->inode = inode;
        req->file = file;
        req->in.numargs = 1;
        req->in.args[0].size = sizeof(struct fuse_read_in);
-        req->in.args[0].value = &inarg;
+        req->in.args[0].value = inarg;
        req->out.argpages = 1;
        req->out.argvar = 1;
        req->out.numargs = 1;
        req->out.args[0].size = count;
-        request_send(fc, req);
-        return req->out.args[0].size;
 }
-static inline size_t fuse_send_read(struct fuse_req *req, struct file *file,
+static size_t fuse_send_read(struct fuse_req *req, struct file *file,
-                                    struct inode *inode, loff_t pos,
+                             struct inode *inode, loff_t pos, size_t count)
-                                    size_t count)
 {
-        return fuse_send_read_common(req, file, inode, pos, count, 0);
+        struct fuse_conn *fc = get_fuse_conn(inode);
+        fuse_read_fill(req, file, inode, pos, count, FUSE_READ);
+        request_send(fc, req);
+        return req->out.args[0].size;
 }
 static int fuse_readpage(struct file *file, struct page *page)
@@ -304,21 +311,33 @@ static int fuse_readpage(struct file *file, struct page *page)
        return err;
 }
-static int fuse_send_readpages(struct fuse_req *req, struct file *file,
+static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
-                               struct inode *inode)
 {
-        loff_t pos = page_offset(req->pages[0]);
+        int i;
-        size_t count = req->num_pages << PAGE_CACHE_SHIFT;
-        unsigned i;
+        fuse_invalidate_attr(req->pages[0]->mapping->host); /* atime changed */
-        req->out.page_zeroing = 1;
-        fuse_send_read(req, file, inode, pos, count);
        for (i = 0; i < req->num_pages; i++) {
                struct page *page = req->pages[i];
                if (!req->out.h.error)
                        SetPageUptodate(page);
+                else
+                        SetPageError(page);
                unlock_page(page);
        }
-        return req->out.h.error;
+        fuse_put_request(fc, req);
+}
+static void fuse_send_readpages(struct fuse_req *req, struct file *file,
+                                struct inode *inode)
+{
+        struct fuse_conn *fc = get_fuse_conn(inode);
+        loff_t pos = page_offset(req->pages[0]);
+        size_t count = req->num_pages << PAGE_CACHE_SHIFT;
+        req->out.page_zeroing = 1;
+        req->end = fuse_readpages_end;
+        fuse_read_fill(req, file, inode, pos, count, FUSE_READ);
+        request_send_background(fc, req);
 }
 struct fuse_readpages_data {
@@ -338,12 +357,12 @@ static int fuse_readpages_fill(void *_data, struct page *page)
            (req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
             (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read ||
             req->pages[req->num_pages - 1]->index + 1 != page->index)) {
-                int err = fuse_send_readpages(req, data->file, inode);
+                fuse_send_readpages(req, data->file, inode);
-                if (err) {
+                data->req = req = fuse_get_request(fc);
+                if (!req) {
                        unlock_page(page);
-                        return err;
+                        return -EINTR;
                }
-                fuse_reset_request(req);
        }
        req->pages[req->num_pages] = page;
        req->num_pages ++;
@@ -368,10 +387,8 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
                return -EINTR;
        err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
-        if (!err && data.req->num_pages)
+        if (!err)
-                err = fuse_send_readpages(data.req, file, inode);
+                fuse_send_readpages(data.req, file, inode);
-        fuse_put_request(fc, data.req);
-        fuse_invalidate_attr(inode); /* atime changed */
        return err;
 }
@@ -560,9 +577,9 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
        struct inode *inode = file->f_dentry->d_inode;
        ssize_t res;
        /* Don't allow parallel writes to the same file */
-        down(&inode->i_sem);
+        mutex_lock(&inode->i_mutex);
        res = fuse_direct_io(file, buf, count, ppos, 1);
-        up(&inode->i_sem);
+        mutex_unlock(&inode->i_mutex);
        return res;
 }
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 74c8d098a14a..46cf933aa3bf 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -94,6 +94,11 @@ struct fuse_out {
        /** Header returned from userspace */
        struct fuse_out_header h;
+        /*
+         * The following bitfields are not changed during the request
+         * processing
+         */
        /** Last argument is variable length (can be shorter than
            arg->size) */
        unsigned argvar:1;
@@ -111,12 +116,23 @@ struct fuse_out {
        struct fuse_arg args[3];
 };
+/** The request state */
+enum fuse_req_state {
+        FUSE_REQ_INIT = 0,
+        FUSE_REQ_PENDING,
+        FUSE_REQ_READING,
+        FUSE_REQ_SENT,
+        FUSE_REQ_FINISHED
+};
+struct fuse_conn;
 /**
 * A request to the client
 */
 struct fuse_req {
-        /** This can be on either unused_list, pending or processing
+        /** This can be on either unused_list, pending processing or
-            lists in fuse_conn */
+            io lists in fuse_conn */
        struct list_head list;
        /** Entry on the background list */
@@ -125,6 +141,12 @@ struct fuse_req {
        /** refcount */
        atomic_t count;
+        /*
+         * The following bitfields are either set once before the
+         * request is queued or setting/clearing them is protected by
+         * fuse_lock
+         */
        /** True if the request has reply */
        unsigned isreply:1;
@@ -140,11 +162,8 @@ struct fuse_req {
        /** Data is being copied to/from the request */
        unsigned locked:1;
-        /** Request has been sent to userspace */
+        /** State of the request */
-        unsigned sent:1;
+        enum fuse_req_state state;
-        /** The request is finished */
-        unsigned finished:1;
        /** The request input */
        struct fuse_in in;
@@ -161,6 +180,7 @@ struct fuse_req {
                struct fuse_release_in release_in;
                struct fuse_init_in init_in;
                struct fuse_init_out init_out;
+                struct fuse_read_in read_in;
        } misc;
        /** page vector */
@@ -180,6 +200,9 @@ struct fuse_req {
        /** File used in the request (or NULL) */
        struct file *file;
+        /** Request completion callback */
+        void (*end)(struct fuse_conn *, struct fuse_req *);
 };
 /**
@@ -190,9 +213,6 @@ struct fuse_req {
 * unmounted.
 */
 struct fuse_conn {
-        /** Reference count */
-        int count;
        /** The user id for this mount */
        uid_t user_id;
@@ -217,6 +237,9 @@ struct fuse_conn {
        /** The list of requests being processed */
        struct list_head processing;
+        /** The list of requests under I/O */
+        struct list_head io;
        /** Requests put in the background (RELEASE or any other
            interrupted request) */
        struct list_head background;
@@ -238,14 +261,22 @@ struct fuse_conn {
        u64 reqctr;
        /** Mount is active */
-        unsigned mounted : 1;
+        unsigned mounted;
-        /** Connection established */
+        /** Connection established, cleared on umount, connection
-        unsigned connected : 1;
+            abort and device release */
+        unsigned connected;
-        /** Connection failed (version mismatch) */
+        /** Connection failed (version mismatch).  Cannot race with
+            setting other bitfields since it is only set once in INIT
+            reply, before any other request, and never cleared */
        unsigned conn_error : 1;
+        /*
+         * The following bitfields are only for optimization purposes
+         * and hence races in setting them will not cause malfunction
+         */
        /** Is fsync not implemented by fs? */
        unsigned no_fsync : 1;
@@ -273,21 +304,22 @@ struct fuse_conn {
        /** Is create not implemented by fs? */
        unsigned no_create : 1;
+        /** The number of requests waiting for completion */
+        atomic_t num_waiting;
        /** Negotiated minor version */
        unsigned minor;
        /** Backing dev info */
        struct backing_dev_info bdi;
-};
-static inline struct fuse_conn **get_fuse_conn_super_p(struct super_block *sb)
+        /** kobject */
-{
+        struct kobject kobj;
-        return (struct fuse_conn **) &sb->s_fs_info;
+};
-}
 static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
 {
-        return *get_fuse_conn_super_p(sb);
+        return sb->s_fs_info;
 }
 static inline struct fuse_conn *get_fuse_conn(struct inode *inode)
@@ -295,6 +327,11 @@ static inline struct fuse_conn *get_fuse_conn(struct inode *inode)
        return get_fuse_conn_super(inode->i_sb);
 }
+static inline struct fuse_conn *get_fuse_conn_kobj(struct kobject *obj)
+{
+        return container_of(obj, struct fuse_conn, kobj);
+}
 static inline struct fuse_inode *get_fuse_inode(struct inode *inode)
 {
        return container_of(inode, struct fuse_inode, inode);
@@ -336,11 +373,10 @@ void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
                      unsigned long nodeid, u64 nlookup);
 /**
- * Send READ or READDIR request
+ * Initialize READ or READDIR request
 */
-size_t fuse_send_read_common(struct fuse_req *req, struct file *file,
+void fuse_read_fill(struct fuse_req *req, struct file *file,
-                             struct inode *inode, loff_t pos, size_t count,
+                    struct inode *inode, loff_t pos, size_t count, int opcode);
-                             int isdir);
 /**
 * Send OPEN or OPENDIR request
@@ -395,12 +431,6 @@ void fuse_init_symlink(struct inode *inode);
 void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr);
 /**
- * Check if the connection can be released, and if yes, then free the
- * connection structure
- */
-void fuse_release_conn(struct fuse_conn *fc);
-/**
 * Initialize the client device
 */
 int fuse_dev_init(void);
@@ -456,6 +486,9 @@ void request_send_background(struct fuse_conn *fc, struct fuse_req *req);
 */
 void fuse_release_background(struct fuse_req *req);
+/* Abort all requests */
+void fuse_abort_conn(struct fuse_conn *fc);
 /**
 * Get the attributes of a file
 */
@@ -465,8 +498,3 @@ int fuse_do_getattr(struct inode *inode);
 * Invalidate inode attributes
 */
 void fuse_invalidate_attr(struct inode *inode);
-/**
- * Send the INIT message
- */
-void fuse_send_init(struct fuse_conn *fc);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 04c80cc957a3..c755a0440a66 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -24,6 +24,13 @@ MODULE_LICENSE("GPL");
 spinlock_t fuse_lock;
 static kmem_cache_t *fuse_inode_cachep;
+static struct subsystem connections_subsys;
+struct fuse_conn_attr {
+        struct attribute attr;
+        ssize_t (*show)(struct fuse_conn *, char *);
+        ssize_t (*store)(struct fuse_conn *, const char *, size_t);
+};
 #define FUSE_SUPER_MAGIC 0x65735546
@@ -189,6 +196,11 @@ struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
        return inode;
 }
+static void fuse_umount_begin(struct super_block *sb)
+{
+        fuse_abort_conn(get_fuse_conn_super(sb));
+}
 static void fuse_put_super(struct super_block *sb)
 {
        struct fuse_conn *fc = get_fuse_conn_super(sb);
@@ -200,14 +212,13 @@ static void fuse_put_super(struct super_block *sb)
        spin_lock(&fuse_lock);
        fc->mounted = 0;
-        fc->user_id = 0;
+        fc->connected = 0;
-        fc->group_id = 0;
+        spin_unlock(&fuse_lock);
-        fc->flags = 0;
+        up_write(&fc->sbput_sem);
        /* Flush all readers on this fs */
        wake_up_all(&fc->waitq);
-        up_write(&fc->sbput_sem);
+        kobject_del(&fc->kobj);
-        fuse_release_conn(fc);
+        kobject_put(&fc->kobj);
-        spin_unlock(&fuse_lock);
 }
 static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr)
@@ -356,8 +367,10 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
        return 0;
 }
-static void free_conn(struct fuse_conn *fc)
+static void fuse_conn_release(struct kobject *kobj)
 {
+        struct fuse_conn *fc = get_fuse_conn_kobj(kobj);
        while (!list_empty(&fc->unused_list)) {
                struct fuse_req *req;
                req = list_entry(fc->unused_list.next, struct fuse_req, list);
@@ -367,33 +380,28 @@ static void free_conn(struct fuse_conn *fc)
        kfree(fc);
 }
-/* Must be called with the fuse lock held */
-void fuse_release_conn(struct fuse_conn *fc)
-{
-        fc->count--;
-        if (!fc->count)
-                free_conn(fc);
-}
 static struct fuse_conn *new_conn(void)
 {
        struct fuse_conn *fc;
-        fc = kmalloc(sizeof(*fc), GFP_KERNEL);
+        fc = kzalloc(sizeof(*fc), GFP_KERNEL);
-        if (fc != NULL) {
+        if (fc) {
                int i;
-                memset(fc, 0, sizeof(*fc));
                init_waitqueue_head(&fc->waitq);
                INIT_LIST_HEAD(&fc->pending);
                INIT_LIST_HEAD(&fc->processing);
+                INIT_LIST_HEAD(&fc->io);
                INIT_LIST_HEAD(&fc->unused_list);
                INIT_LIST_HEAD(&fc->background);
-                sema_init(&fc->outstanding_sem, 0);
+                sema_init(&fc->outstanding_sem, 1); /* One for INIT */
                init_rwsem(&fc->sbput_sem);
+                kobj_set_kset_s(fc, connections_subsys);
+                kobject_init(&fc->kobj);
+                atomic_set(&fc->num_waiting, 0);
                for (i = 0; i < FUSE_MAX_OUTSTANDING; i++) {
                        struct fuse_req *req = fuse_request_alloc();
                        if (!req) {
-                                free_conn(fc);
+                                kobject_put(&fc->kobj);
                                return NULL;
                        }
                        list_add(&req->list, &fc->unused_list);
@@ -408,25 +416,32 @@ static struct fuse_conn *new_conn(void)
 static struct fuse_conn *get_conn(struct file *file, struct super_block *sb)
 {
        struct fuse_conn *fc;
+        int err;
+        err = -EINVAL;
        if (file->f_op != &fuse_dev_operations)
-                return ERR_PTR(-EINVAL);
+                goto out_err;
+        err = -ENOMEM;
        fc = new_conn();
-        if (fc == NULL)
+        if (!fc)
-                return ERR_PTR(-ENOMEM);
+                goto out_err;
        spin_lock(&fuse_lock);
-        if (file->private_data) {
+        err = -EINVAL;
-                free_conn(fc);
+        if (file->private_data)
-                fc = ERR_PTR(-EINVAL);
+                goto out_unlock;
-        } else {
-                file->private_data = fc;
+        kobject_get(&fc->kobj);
-                *get_fuse_conn_super_p(sb) = fc;
+        file->private_data = fc;
-                fc->mounted = 1;
-                fc->connected = 1;
-                fc->count = 2;
-        }
        spin_unlock(&fuse_lock);
        return fc;
+ out_unlock:
+        spin_unlock(&fuse_lock);
+        kobject_put(&fc->kobj);
+ out_err:
+        return ERR_PTR(err);
 }
 static struct inode *get_root_inode(struct super_block *sb, unsigned mode)
@@ -445,16 +460,74 @@ static struct super_operations fuse_super_operations = {
        .read_inode     = fuse_read_inode,
        .clear_inode    = fuse_clear_inode,
        .put_super      = fuse_put_super,
+        .umount_begin   = fuse_umount_begin,
        .statfs         = fuse_statfs,
        .show_options   = fuse_show_options,
 };
+static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
+{
+        int i;
+        struct fuse_init_out *arg = &req->misc.init_out;
+        if (req->out.h.error || arg->major != FUSE_KERNEL_VERSION)
+                fc->conn_error = 1;
+        else {
+                fc->minor = arg->minor;
+                fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
+        }
+        /* After INIT reply is received other requests can go
+           out.  So do (FUSE_MAX_OUTSTANDING - 1) number of
+           up()s on outstanding_sem.  The last up() is done in
+           fuse_putback_request() */
+        for (i = 1; i < FUSE_MAX_OUTSTANDING; i++)
+                up(&fc->outstanding_sem);
+        fuse_put_request(fc, req);
+}
+static void fuse_send_init(struct fuse_conn *fc)
+{
+        /* This is called from fuse_read_super() so there's guaranteed
+           to be exactly one request available */
+        struct fuse_req *req = fuse_get_request(fc);
+        struct fuse_init_in *arg = &req->misc.init_in;
+        arg->major = FUSE_KERNEL_VERSION;
+        arg->minor = FUSE_KERNEL_MINOR_VERSION;
+        req->in.h.opcode = FUSE_INIT;
+        req->in.numargs = 1;
+        req->in.args[0].size = sizeof(*arg);
+        req->in.args[0].value = arg;
+        req->out.numargs = 1;
+        /* Variable length arguement used for backward compatibility
+           with interface version < 7.5.  Rest of init_out is zeroed
+           by do_get_request(), so a short reply is not a problem */
+        req->out.argvar = 1;
+        req->out.args[0].size = sizeof(struct fuse_init_out);
+        req->out.args[0].value = &req->misc.init_out;
+        req->end = process_init_reply;
+        request_send_background(fc, req);
+}
+static unsigned long long conn_id(void)
+{
+        static unsigned long long ctr = 1;
+        unsigned long long val;
+        spin_lock(&fuse_lock);
+        val = ctr++;
+        spin_unlock(&fuse_lock);
+        return val;
+}
 static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 {
        struct fuse_conn *fc;
        struct inode *root;
        struct fuse_mount_data d;
        struct file *file;
+        struct dentry *root_dentry;
        int err;
        if (!parse_fuse_opt((char *) data, &d))
@@ -482,23 +555,42 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
        if (fc->max_read / PAGE_CACHE_SIZE < fc->bdi.ra_pages)
                fc->bdi.ra_pages = fc->max_read / PAGE_CACHE_SIZE;
+        /* Used by get_root_inode() */
+        sb->s_fs_info = fc;
        err = -ENOMEM;
        root = get_root_inode(sb, d.rootmode);
-        if (root == NULL)
+        if (!root)
                goto err;
-        sb->s_root = d_alloc_root(root);
+        root_dentry = d_alloc_root(root);
-        if (!sb->s_root) {
+        if (!root_dentry) {
                iput(root);
                goto err;
        }
+        err = kobject_set_name(&fc->kobj, "%llu", conn_id());
+        if (err)
+                goto err_put_root;
+        err = kobject_add(&fc->kobj);
+        if (err)
+                goto err_put_root;
+        sb->s_root = root_dentry;
+        spin_lock(&fuse_lock);
+        fc->mounted = 1;
+        fc->connected = 1;
+        spin_unlock(&fuse_lock);
        fuse_send_init(fc);
        return 0;
+ err_put_root:
+        dput(root_dentry);
 err:
-        spin_lock(&fuse_lock);
+        kobject_put(&fc->kobj);
-        fuse_release_conn(fc);
-        spin_unlock(&fuse_lock);
        return err;
 }
@@ -516,6 +608,69 @@ static struct file_system_type fuse_fs_type = {
        .kill_sb        = kill_anon_super,
 };
+static ssize_t fuse_conn_waiting_show(struct fuse_conn *fc, char *page)
+{
+        return sprintf(page, "%i\n", atomic_read(&fc->num_waiting));
+}
+static ssize_t fuse_conn_abort_store(struct fuse_conn *fc, const char *page,
+                                     size_t count)
+{
+        fuse_abort_conn(fc);
+        return count;
+}
+static struct fuse_conn_attr fuse_conn_waiting =
+        __ATTR(waiting, 0400, fuse_conn_waiting_show, NULL);
+static struct fuse_conn_attr fuse_conn_abort =
+        __ATTR(abort, 0600, NULL, fuse_conn_abort_store);
+static struct attribute *fuse_conn_attrs[] = {
+        &fuse_conn_waiting.attr,
+        &fuse_conn_abort.attr,
+        NULL,
+};
+static ssize_t fuse_conn_attr_show(struct kobject *kobj,
+                                   struct attribute *attr,
+                                   char *page)
+{
+        struct fuse_conn_attr *fca =
+                container_of(attr, struct fuse_conn_attr, attr);
+        if (fca->show)
+                return fca->show(get_fuse_conn_kobj(kobj), page);
+        else
+                return -EACCES;
+}
+static ssize_t fuse_conn_attr_store(struct kobject *kobj,
+                                    struct attribute *attr,
+                                    const char *page, size_t count)
+{
+        struct fuse_conn_attr *fca =
+                container_of(attr, struct fuse_conn_attr, attr);
+        if (fca->store)
+                return fca->store(get_fuse_conn_kobj(kobj), page, count);
+        else
+                return -EACCES;
+}
+static struct sysfs_ops fuse_conn_sysfs_ops = {
+        .show   = &fuse_conn_attr_show,
+        .store  = &fuse_conn_attr_store,
+};
+static struct kobj_type ktype_fuse_conn = {
+        .release        = fuse_conn_release,
+        .sysfs_ops      = &fuse_conn_sysfs_ops,
+        .default_attrs  = fuse_conn_attrs,
+};
+static decl_subsys(fuse, NULL, NULL);
+static decl_subsys(connections, &ktype_fuse_conn, NULL);
 static void fuse_inode_init_once(void *foo, kmem_cache_t *cachep,
                                 unsigned long flags)
 {
@@ -553,6 +708,34 @@ static void fuse_fs_cleanup(void)
        kmem_cache_destroy(fuse_inode_cachep);
 }
+static int fuse_sysfs_init(void)
+{
+        int err;
+        kset_set_kset_s(&fuse_subsys, fs_subsys);
+        err = subsystem_register(&fuse_subsys);
+        if (err)
+                goto out_err;
+        kset_set_kset_s(&connections_subsys, fuse_subsys);
+        err = subsystem_register(&connections_subsys);
+        if (err)
+                goto out_fuse_unregister;
+        return 0;
+ out_fuse_unregister:
+        subsystem_unregister(&fuse_subsys);
+ out_err:
+        return err;
+}
+static void fuse_sysfs_cleanup(void)
+{
+        subsystem_unregister(&connections_subsys);
+        subsystem_unregister(&fuse_subsys);
+}
 static int __init fuse_init(void)
 {
        int res;
@@ -569,8 +752,14 @@ static int __init fuse_init(void)
        if (res)
                goto err_fs_cleanup;
+        res = fuse_sysfs_init();
+        if (res)
+                goto err_dev_cleanup;
        return 0;
+ err_dev_cleanup:
+        fuse_dev_cleanup();
 err_fs_cleanup:
        fuse_fs_cleanup();
 err:
@@ -581,6 +770,7 @@ static void __exit fuse_exit(void)
 {
        printk(KERN_DEBUG "fuse exit\n");
+        fuse_sysfs_cleanup();
        fuse_fs_cleanup();
        fuse_dev_cleanup();
 }
diff --git a/fs/hfs/bfind.c b/fs/hfs/bfind.c
index 89450ae32228..f13f1494d4fe 100644
--- a/fs/hfs/bfind.c
+++ b/fs/hfs/bfind.c
@@ -64,7 +64,6 @@ int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd)
                else
                        e = rec - 1;
        } while (b <= e);
-        //printk("%d: %d,%d,%d\n", bnode->this, b, e, rec);
        if (rec != e && e >= 0) {
                len = hfs_brec_lenoff(bnode, e, &off);
                keylen = hfs_brec_keylen(bnode, e);
@@ -127,7 +126,7 @@ int hfs_brec_find(struct hfs_find_data *fd)
        return res;
 invalid:
-        printk("HFS: inconsistency in B*Tree (%d,%d,%d,%u,%u)\n",
+        printk(KERN_ERR "hfs: inconsistency in B*Tree (%d,%d,%d,%u,%u)\n",
                height, bnode->height, bnode->type, nidx, parent);
        res = -EIO;
 release:
diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c
index 3d5cdc6847c0..a7a7d77f3fd3 100644
--- a/fs/hfs/bnode.c
+++ b/fs/hfs/bnode.c
@@ -198,7 +198,7 @@ void hfs_bnode_unlink(struct hfs_bnode *node)
        // move down?
        if (!node->prev && !node->next) {
-                printk("hfs_btree_del_level\n");
+                printk(KERN_DEBUG "hfs_btree_del_level\n");
        }
        if (!node->parent) {
                tree->root = 0;
@@ -219,7 +219,7 @@ struct hfs_bnode *hfs_bnode_findhash(struct hfs_btree *tree, u32 cnid)
        struct hfs_bnode *node;
        if (cnid >= tree->node_count) {
-                printk("HFS: request for non-existent node %d in B*Tree\n", cnid);
+                printk(KERN_ERR "hfs: request for non-existent node %d in B*Tree\n", cnid);
                return NULL;
        }
@@ -242,7 +242,7 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid)
        loff_t off;
        if (cnid >= tree->node_count) {
-                printk("HFS: request for non-existent node %d in B*Tree\n", cnid);
+                printk(KERN_ERR "hfs: request for non-existent node %d in B*Tree\n", cnid);
                return NULL;
        }
diff --git a/fs/hfs/brec.c b/fs/hfs/brec.c
index 7d8fff2c25fc..5c87cf4801fc 100644
--- a/fs/hfs/brec.c
+++ b/fs/hfs/brec.c
@@ -362,7 +362,7 @@ again:
                end_off = hfs_bnode_read_u16(parent, end_rec_off);
                if (end_rec_off - end_off < diff) {
-                        printk("splitting index node...\n");
+                        printk(KERN_DEBUG "hfs: splitting index node...\n");
                        fd->bnode = parent;
                        new_node = hfs_bnode_split(fd);
                        if (IS_ERR(new_node))
diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c
index 394725efa1c8..7bb11edd1488 100644
--- a/fs/hfs/btree.c
+++ b/fs/hfs/btree.c
@@ -111,7 +111,7 @@ void hfs_btree_close(struct hfs_btree *tree)
                while ((node = tree->node_hash[i])) {
                        tree->node_hash[i] = node->next_hash;
                        if (atomic_read(&node->refcnt))
-                                printk("HFS: node %d:%d still has %d user(s)!\n",
+                                printk(KERN_ERR "hfs: node %d:%d still has %d user(s)!\n",
                                        node->tree->cnid, node->this, atomic_read(&node->refcnt));
                        hfs_bnode_free(node);
                        tree->node_hash_cnt--;
@@ -252,7 +252,7 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
                kunmap(*pagep);
                nidx = node->next;
                if (!nidx) {
-                        printk("create new bmap node...\n");
+                        printk(KERN_DEBUG "hfs: create new bmap node...\n");
                        next_node = hfs_bmap_new_bmap(node, idx);
                } else
                        next_node = hfs_bnode_find(tree, nidx);
@@ -292,7 +292,7 @@ void hfs_bmap_free(struct hfs_bnode *node)
                hfs_bnode_put(node);
                if (!i) {
                        /* panic */;
-                        printk("HFS: unable to free bnode %u. bmap not found!\n", node->this);
+                        printk(KERN_CRIT "hfs: unable to free bnode %u. bmap not found!\n", node->this);
                        return;
                }
                node = hfs_bnode_find(tree, i);
@@ -300,7 +300,7 @@ void hfs_bmap_free(struct hfs_bnode *node)
                        return;
                if (node->type != HFS_NODE_MAP) {
                        /* panic */;
-                        printk("HFS: invalid bmap found! (%u,%d)\n", node->this, node->type);
+                        printk(KERN_CRIT "hfs: invalid bmap found! (%u,%d)\n", node->this, node->type);
                        hfs_bnode_put(node);
                        return;
                }
@@ -313,7 +313,7 @@ void hfs_bmap_free(struct hfs_bnode *node)
        m = 1 << (~nidx & 7);
        byte = data[off];
        if (!(byte & m)) {
-                printk("HFS: trying to free free bnode %u(%d)\n", node->this, node->type);
+                printk(KERN_CRIT "hfs: trying to free free bnode %u(%d)\n", node->this, node->type);
                kunmap(page);
                hfs_bnode_put(node);
                return;
diff --git a/fs/hfs/catalog.c b/fs/hfs/catalog.c
index 2fcd679f0238..ba851576ebb1 100644
--- a/fs/hfs/catalog.c
+++ b/fs/hfs/catalog.c
@@ -184,7 +184,7 @@ int hfs_cat_find_brec(struct super_block *sb, u32 cnid,
        type = rec.type;
        if (type != HFS_CDR_THD && type != HFS_CDR_FTH) {
-                printk("HFS-fs: Found bad thread record in catalog\n");
+                printk(KERN_ERR "hfs: found bad thread record in catalog\n");
                return -EIO;
        }
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index e1f24befba58..534e5a7480ef 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -81,12 +81,12 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
        case 1:
                hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, fd.entrylength);
                if (entry.type != HFS_CDR_THD) {
-                        printk("HFS: bad catalog folder thread\n");
+                        printk(KERN_ERR "hfs: bad catalog folder thread\n");
                        err = -EIO;
                        goto out;
                }
                //if (fd.entrylength < HFS_MIN_THREAD_SZ) {
-                //      printk("HFS: truncated catalog thread\n");
+                //      printk(KERN_ERR "hfs: truncated catalog thread\n");
                //      err = -EIO;
                //      goto out;
                //}
@@ -105,7 +105,7 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
        for (;;) {
                if (be32_to_cpu(fd.key->cat.ParID) != inode->i_ino) {
-                        printk("HFS: walked past end of dir\n");
+                        printk(KERN_ERR "hfs: walked past end of dir\n");
                        err = -EIO;
                        goto out;
                }
@@ -114,7 +114,7 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
                len = hfs_mac2asc(sb, strbuf, &fd.key->cat.CName);
                if (type == HFS_CDR_DIR) {
                        if (fd.entrylength < sizeof(struct hfs_cat_dir)) {
-                                printk("HFS: small dir entry\n");
+                                printk(KERN_ERR "hfs: small dir entry\n");
                                err = -EIO;
                                goto out;
                        }
@@ -123,7 +123,7 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
                                break;
                } else if (type == HFS_CDR_FIL) {
                        if (fd.entrylength < sizeof(struct hfs_cat_file)) {
-                                printk("HFS: small file entry\n");
+                                printk(KERN_ERR "hfs: small file entry\n");
                                err = -EIO;
                                goto out;
                        }
@@ -131,7 +131,7 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
                                    be32_to_cpu(entry.file.FlNum), DT_REG))
                                break;
                } else {
-                        printk("HFS: bad catalog entry type %d\n", type);
+                        printk(KERN_ERR "hfs: bad catalog entry type %d\n", type);
                        err = -EIO;
                        goto out;
                }
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index cc5dcd52e23d..18ce47ab1b71 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -35,9 +35,6 @@
 #define dprint(flg, fmt, args...) \
        if (flg & DBG_MASK) printk(fmt , ## args)
-#define hfs_warn(format, args...) printk(KERN_WARNING format , ## args)
-#define hfs_error(format, args...) printk(KERN_ERR format , ## args)
 /*
 * struct hfs_inode_info
 *
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index d499393a8ae7..39fd85b9b916 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -95,7 +95,6 @@ static int hfs_releasepage(struct page *page, gfp_t mask)
                } while (--i && nidx < tree->node_count);
                spin_unlock(&tree->hash_lock);
        }
-        //printk("releasepage: %lu,%x = %d\n", page->index, mask, res);
        return res ? try_to_free_buffers(page) : 0;
 }
@@ -547,13 +546,13 @@ static int hfs_file_release(struct inode *inode, struct file *file)
        if (atomic_read(&file->f_count) != 0)
                return 0;
        if (atomic_dec_and_test(&HFS_I(inode)->opencnt)) {
-                down(&inode->i_sem);
+                mutex_lock(&inode->i_mutex);
                hfs_file_truncate(inode);
                //if (inode->i_flags & S_DEAD) {
                //      hfs_delete_cat(inode->i_ino, HFSPLUS_SB(sb).hidden_dir, NULL);
                //      hfs_delete_inode(inode);
                //}
-                up(&inode->i_sem);
+                mutex_unlock(&inode->i_mutex);
        }
        return 0;
 }
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c
index 0a473f79c89f..b4651e128d7f 100644
--- a/fs/hfs/mdb.c
+++ b/fs/hfs/mdb.c
@@ -47,7 +47,7 @@ static int hfs_get_last_session(struct super_block *sb,
                        *start = (sector_t)te.cdte_addr.lba << 2;
                        return 0;
                }
-                printk(KERN_ERR "HFS: Invalid session number or type of track\n");
+                printk(KERN_ERR "hfs: invalid session number or type of track\n");
                return -EINVAL;
        }
        ms_info.addr_format = CDROM_LBA;
@@ -100,7 +100,7 @@ int hfs_mdb_get(struct super_block *sb)
        HFS_SB(sb)->alloc_blksz = size = be32_to_cpu(mdb->drAlBlkSiz);
        if (!size || (size & (HFS_SECTOR_SIZE - 1))) {
-                hfs_warn("hfs_fs: bad allocation block size %d\n", size);
+                printk(KERN_ERR "hfs: bad allocation block size %d\n", size);
                goto out_bh;
        }
@@ -117,7 +117,7 @@ int hfs_mdb_get(struct super_block *sb)
                size >>= 1;
        brelse(bh);
        if (!sb_set_blocksize(sb, size)) {
-                printk("hfs_fs: unable to set blocksize to %u\n", size);
+                printk(KERN_ERR "hfs: unable to set blocksize to %u\n", size);
                goto out;
        }
@@ -161,8 +161,8 @@ int hfs_mdb_get(struct super_block *sb)
        }
        if (!HFS_SB(sb)->alt_mdb) {
-                hfs_warn("hfs_fs: unable to locate alternate MDB\n");
+                printk(KERN_WARNING "hfs: unable to locate alternate MDB\n");
-                hfs_warn("hfs_fs: continuing without an alternate MDB\n");
+                printk(KERN_WARNING "hfs: continuing without an alternate MDB\n");
        }
        HFS_SB(sb)->bitmap = (__be32 *)__get_free_pages(GFP_KERNEL, PAGE_SIZE < 8192 ? 1 : 0);
@@ -177,7 +177,7 @@ int hfs_mdb_get(struct super_block *sb)
        while (size) {
                bh = sb_bread(sb, off >> sb->s_blocksize_bits);
                if (!bh) {
-                        hfs_warn("hfs_fs: unable to read volume bitmap\n");
+                        printk(KERN_ERR "hfs: unable to read volume bitmap\n");
                        goto out;
                }
                off2 = off & (sb->s_blocksize - 1);
@@ -191,23 +191,23 @@ int hfs_mdb_get(struct super_block *sb)
        HFS_SB(sb)->ext_tree = hfs_btree_open(sb, HFS_EXT_CNID, hfs_ext_keycmp);
        if (!HFS_SB(sb)->ext_tree) {
-                hfs_warn("hfs_fs: unable to open extent tree\n");
+                printk(KERN_ERR "hfs: unable to open extent tree\n");
                goto out;
        }
        HFS_SB(sb)->cat_tree = hfs_btree_open(sb, HFS_CAT_CNID, hfs_cat_keycmp);
        if (!HFS_SB(sb)->cat_tree) {
-                hfs_warn("hfs_fs: unable to open catalog tree\n");
+                printk(KERN_ERR "hfs: unable to open catalog tree\n");
                goto out;
        }
        attrib = mdb->drAtrb;
        if (!(attrib & cpu_to_be16(HFS_SB_ATTRIB_UNMNT))) {
-                hfs_warn("HFS-fs warning: Filesystem was not cleanly unmounted, "
+                printk(KERN_WARNING "hfs: filesystem was not cleanly unmounted, "
                         "running fsck.hfs is recommended.  mounting read-only.\n");
                sb->s_flags |= MS_RDONLY;
        }
        if ((attrib & cpu_to_be16(HFS_SB_ATTRIB_SLOCK))) {
-                hfs_warn("HFS-fs: Filesystem is marked locked, mounting read-only.\n");
+                printk(KERN_WARNING "hfs: filesystem is marked locked, mounting read-only.\n");
                sb->s_flags |= MS_RDONLY;
        }
        if (!(sb->s_flags & MS_RDONLY)) {
@@ -303,7 +303,7 @@ void hfs_mdb_commit(struct super_block *sb)
                while (size) {
                        bh = sb_bread(sb, block);
                        if (!bh) {
-                                hfs_warn("hfs_fs: unable to read volume bitmap\n");
+                                printk(KERN_ERR "hfs: unable to read volume bitmap\n");
                                break;
                        }
                        len = min((int)sb->s_blocksize - off, size);
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index c5074aeafcae..1181d116117d 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -101,12 +101,12 @@ static int hfs_remount(struct super_block *sb, int *flags, char *data)
                return 0;
        if (!(*flags & MS_RDONLY)) {
                if (!(HFS_SB(sb)->mdb->drAtrb & cpu_to_be16(HFS_SB_ATTRIB_UNMNT))) {
-                        printk("HFS-fs warning: Filesystem was not cleanly unmounted, "
+                        printk(KERN_WARNING "hfs: filesystem was not cleanly unmounted, "
                               "running fsck.hfs is recommended.  leaving read-only.\n");
                        sb->s_flags |= MS_RDONLY;
                        *flags |= MS_RDONLY;
                } else if (HFS_SB(sb)->mdb->drAtrb & cpu_to_be16(HFS_SB_ATTRIB_SLOCK)) {
-                        printk("HFS-fs: Filesystem is marked locked, leaving read-only.\n");
+                        printk(KERN_WARNING "hfs: filesystem is marked locked, leaving read-only.\n");
                        sb->s_flags |= MS_RDONLY;
                        *flags |= MS_RDONLY;
                }
@@ -229,21 +229,21 @@ static int parse_options(char *options, struct hfs_sb_info *hsb)
                switch (token) {
                case opt_uid:
                        if (match_int(&args[0], &tmp)) {
-                                printk("HFS: uid requires an argument\n");
+                                printk(KERN_ERR "hfs: uid requires an argument\n");
                                return 0;
                        }
                        hsb->s_uid = (uid_t)tmp;
                        break;
                case opt_gid:
                        if (match_int(&args[0], &tmp)) {
-                                printk("HFS: gid requires an argument\n");
+                                printk(KERN_ERR "hfs: gid requires an argument\n");
                                return 0;
                        }
                        hsb->s_gid = (gid_t)tmp;
                        break;
                case opt_umask:
                        if (match_octal(&args[0], &tmp)) {
-                                printk("HFS: umask requires a value\n");
+                                printk(KERN_ERR "hfs: umask requires a value\n");
                                return 0;
                        }
                        hsb->s_file_umask = (umode_t)tmp;
@@ -251,39 +251,39 @@ static int parse_options(char *options, struct hfs_sb_info *hsb)
                        break;
                case opt_file_umask:
                        if (match_octal(&args[0], &tmp)) {
-                                printk("HFS: file_umask requires a value\n");
+                                printk(KERN_ERR "hfs: file_umask requires a value\n");
                                return 0;
                        }
                        hsb->s_file_umask = (umode_t)tmp;
                        break;
                case opt_dir_umask:
                        if (match_octal(&args[0], &tmp)) {
-                                printk("HFS: dir_umask requires a value\n");
+                                printk(KERN_ERR "hfs: dir_umask requires a value\n");
                                return 0;
                        }
                        hsb->s_dir_umask = (umode_t)tmp;
                        break;
                case opt_part:
                        if (match_int(&args[0], &hsb->part)) {
-                                printk("HFS: part requires an argument\n");
+                                printk(KERN_ERR "hfs: part requires an argument\n");
                                return 0;
                        }
                        break;
                case opt_session:
                        if (match_int(&args[0], &hsb->session)) {
-                                printk("HFS: session requires an argument\n");
+                                printk(KERN_ERR "hfs: session requires an argument\n");
                                return 0;
                        }
                        break;
                case opt_type:
                        if (match_fourchar(&args[0], &hsb->s_type)) {
-                                printk("HFS+-fs: type requires a 4 character value\n");
+                                printk(KERN_ERR "hfs: type requires a 4 character value\n");
                                return 0;
                        }
                        break;
                case opt_creator:
                        if (match_fourchar(&args[0], &hsb->s_creator)) {
-                                printk("HFS+-fs: creator requires a 4 character value\n");
+                                printk(KERN_ERR "hfs: creator requires a 4 character value\n");
                                return 0;
                        }
                        break;
@@ -292,13 +292,13 @@ static int parse_options(char *options, struct hfs_sb_info *hsb)
                        break;
                case opt_codepage:
                        if (hsb->nls_disk) {
-                                printk("HFS+-fs: unable to change codepage\n");
+                                printk(KERN_ERR "hfs: unable to change codepage\n");
                                return 0;
                        }
                        p = match_strdup(&args[0]);
                        hsb->nls_disk = load_nls(p);
                        if (!hsb->nls_disk) {
-                                printk("HFS+-fs: unable to load codepage \"%s\"\n", p);
+                                printk(KERN_ERR "hfs: unable to load codepage \"%s\"\n", p);
                                kfree(p);
                                return 0;
                        }
@@ -306,13 +306,13 @@ static int parse_options(char *options, struct hfs_sb_info *hsb)
                        break;
                case opt_iocharset:
                        if (hsb->nls_io) {
-                                printk("HFS: unable to change iocharset\n");
+                                printk(KERN_ERR "hfs: unable to change iocharset\n");
                                return 0;
                        }
                        p = match_strdup(&args[0]);
                        hsb->nls_io = load_nls(p);
                        if (!hsb->nls_io) {
-                                printk("HFS: unable to load iocharset \"%s\"\n", p);
+                                printk(KERN_ERR "hfs: unable to load iocharset \"%s\"\n", p);
                                kfree(p);
                                return 0;
                        }
@@ -326,7 +326,7 @@ static int parse_options(char *options, struct hfs_sb_info *hsb)
        if (hsb->nls_disk && !hsb->nls_io) {
                hsb->nls_io = load_nls_default();
                if (!hsb->nls_io) {
-                        printk("HFS: unable to load default iocharset\n");
+                        printk(KERN_ERR "hfs: unable to load default iocharset\n");
                        return 0;
                }
        }
@@ -364,7 +364,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
        res = -EINVAL;
        if (!parse_options((char *)data, sbi)) {
-                hfs_warn("hfs_fs: unable to parse mount options.\n");
+                printk(KERN_ERR "hfs: unable to parse mount options.\n");
                goto bail;
        }
@@ -375,7 +375,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
        res = hfs_mdb_get(sb);
        if (res) {
                if (!silent)
-                        hfs_warn("VFS: Can't find a HFS filesystem on dev %s.\n",
+                        printk(KERN_WARNING "hfs: can't find a HFS filesystem on dev %s.\n",
                                hfs_mdb_name(sb));
                res = -EINVAL;
                goto bail;
@@ -407,7 +407,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
 bail_iput:
        iput(root_inode);
 bail_no_root:
-        hfs_warn("hfs_fs: get root inode failed.\n");
+        printk(KERN_ERR "hfs: get root inode failed.\n");
 bail:
        hfs_mdb_put(sb);
        return res;
@@ -454,7 +454,7 @@ static void __exit exit_hfs_fs(void)
 {
        unregister_filesystem(&hfs_fs_type);
        if (kmem_cache_destroy(hfs_inode_cachep))
-                printk(KERN_INFO "hfs_inode_cache: not all structures were freed\n");
+                printk(KERN_ERR "hfs_inode_cache: not all structures were freed\n");
 }
 module_init(init_hfs_fs)
diff --git a/fs/hfsplus/bfind.c b/fs/hfsplus/bfind.c
index 257cdde0514b..5007a41f1be9 100644
--- a/fs/hfsplus/bfind.c
+++ b/fs/hfsplus/bfind.c
@@ -64,7 +64,6 @@ int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd)
                else
                        e = rec - 1;
        } while (b <= e);
-        //printk("%d: %d,%d,%d\n", bnode->this, b, e, rec);
        if (rec != e && e >= 0) {
                len = hfs_brec_lenoff(bnode, e, &off);
                keylen = hfs_brec_keylen(bnode, e);
@@ -127,7 +126,7 @@ int hfs_brec_find(struct hfs_find_data *fd)
        return res;
 invalid:
-        printk("HFS+-fs: inconsistency in B*Tree (%d,%d,%d,%u,%u)\n",
+        printk(KERN_ERR "hfs: inconsistency in B*Tree (%d,%d,%d,%u,%u)\n",
                height, bnode->height, bnode->type, nidx, parent);
        res = -EIO;
 release:
diff --git a/fs/hfsplus/bitmap.c b/fs/hfsplus/bitmap.c
index c7d316455fa0..9fb51632303c 100644
--- a/fs/hfsplus/bitmap.c
+++ b/fs/hfsplus/bitmap.c
@@ -29,7 +29,7 @@ int hfsplus_block_allocate(struct super_block *sb, u32 size, u32 offset, u32 *ma
                return size;
        dprint(DBG_BITMAP, "block_allocate: %u,%u,%u\n", size, offset, len);
-        down(&HFSPLUS_SB(sb).alloc_file->i_sem);
+        mutex_lock(&HFSPLUS_SB(sb).alloc_file->i_mutex);
        mapping = HFSPLUS_SB(sb).alloc_file->i_mapping;
        page = read_cache_page(mapping, offset / PAGE_CACHE_BITS,
                               (filler_t *)mapping->a_ops->readpage, NULL);
@@ -143,7 +143,7 @@ done:
        sb->s_dirt = 1;
        dprint(DBG_BITMAP, "-> %u,%u\n", start, *max);
 out:
-        up(&HFSPLUS_SB(sb).alloc_file->i_sem);
+        mutex_unlock(&HFSPLUS_SB(sb).alloc_file->i_mutex);
        return start;
 }
@@ -164,7 +164,7 @@ int hfsplus_block_free(struct super_block *sb, u32 offset, u32 count)
        if ((offset + count) > HFSPLUS_SB(sb).total_blocks)
                return -2;
-        down(&HFSPLUS_SB(sb).alloc_file->i_sem);
+        mutex_lock(&HFSPLUS_SB(sb).alloc_file->i_mutex);
        mapping = HFSPLUS_SB(sb).alloc_file->i_mapping;
        pnr = offset / PAGE_CACHE_BITS;
        page = read_cache_page(mapping, pnr, (filler_t *)mapping->a_ops->readpage, NULL);
@@ -215,7 +215,7 @@ out:
        kunmap(page);
        HFSPLUS_SB(sb).free_blocks += len;
        sb->s_dirt = 1;
-        up(&HFSPLUS_SB(sb).alloc_file->i_sem);
+        mutex_unlock(&HFSPLUS_SB(sb).alloc_file->i_mutex);
        return 0;
 }
diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c
index 930cd9212de8..8f07e8fbd03d 100644
--- a/fs/hfsplus/bnode.c
+++ b/fs/hfsplus/bnode.c
@@ -358,7 +358,7 @@ void hfs_bnode_unlink(struct hfs_bnode *node)
        // move down?
        if (!node->prev && !node->next) {
-                printk("hfs_btree_del_level\n");
+                printk(KERN_DEBUG "hfs_btree_del_level\n");
        }
        if (!node->parent) {
                tree->root = 0;
@@ -379,7 +379,7 @@ struct hfs_bnode *hfs_bnode_findhash(struct hfs_btree *tree, u32 cnid)
        struct hfs_bnode *node;
        if (cnid >= tree->node_count) {
-                printk("HFS+-fs: request for non-existent node %d in B*Tree\n", cnid);
+                printk(KERN_ERR "hfs: request for non-existent node %d in B*Tree\n", cnid);
                return NULL;
        }
@@ -402,7 +402,7 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid)
        loff_t off;
        if (cnid >= tree->node_count) {
-                printk("HFS+-fs: request for non-existent node %d in B*Tree\n", cnid);
+                printk(KERN_ERR "hfs: request for non-existent node %d in B*Tree\n", cnid);
                return NULL;
        }
@@ -576,8 +576,9 @@ struct hfs_bnode *hfs_bnode_create(struct hfs_btree *tree, u32 num)
        node = hfs_bnode_findhash(tree, num);
        spin_unlock(&tree->hash_lock);
        if (node) {
-                printk("new node %u already hashed?\n", num);
+                printk(KERN_CRIT "new node %u already hashed?\n", num);
-                BUG();
+                WARN_ON(1);
+                return node;
        }
        node = __hfs_bnode_create(tree, num);
        if (!node)
diff --git a/fs/hfsplus/brec.c b/fs/hfsplus/brec.c
index 0ccef2ab790c..c88e5d72a402 100644
--- a/fs/hfsplus/brec.c
+++ b/fs/hfsplus/brec.c
@@ -360,7 +360,7 @@ again:
                end_off = hfs_bnode_read_u16(parent, end_rec_off);
                if (end_rec_off - end_off < diff) {
-                        printk("splitting index node...\n");
+                        printk(KERN_DEBUG "hfs: splitting index node...\n");
                        fd->bnode = parent;
                        new_node = hfs_bnode_split(fd);
                        if (IS_ERR(new_node))
diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c
index 44326aa2bd34..a67edfa34e9e 100644
--- a/fs/hfsplus/btree.c
+++ b/fs/hfsplus/btree.c
@@ -31,17 +31,8 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id)
        init_MUTEX(&tree->tree_lock);
        spin_lock_init(&tree->hash_lock);
-        /* Set the correct compare function */
        tree->sb = sb;
        tree->cnid = id;
-        if (id == HFSPLUS_EXT_CNID) {
-                tree->keycmp = hfsplus_ext_cmp_key;
-        } else if (id == HFSPLUS_CAT_CNID) {
-                tree->keycmp = hfsplus_cat_cmp_key;
-        } else {
-                printk("HFS+-fs: unknown B*Tree requested\n");
-                goto free_tree;
-        }
        tree->inode = iget(sb, id);
        if (!tree->inode)
                goto free_tree;
@@ -64,6 +55,20 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id)
        tree->max_key_len = be16_to_cpu(head->max_key_len);
        tree->depth = be16_to_cpu(head->depth);
+        /* Set the correct compare function */
+        if (id == HFSPLUS_EXT_CNID) {
+                tree->keycmp = hfsplus_ext_cmp_key;
+        } else if (id == HFSPLUS_CAT_CNID) {
+                if ((HFSPLUS_SB(sb).flags & HFSPLUS_SB_HFSX) &&
+                    (head->key_type == HFSPLUS_KEY_BINARY))
+                        tree->keycmp = hfsplus_cat_bin_cmp_key;
+                else
+                        tree->keycmp = hfsplus_cat_case_cmp_key;
+        } else {
+                printk(KERN_ERR "hfs: unknown B*Tree requested\n");
+                goto fail_page;
+        }
        size = tree->node_size;
        if (!size || size & (size - 1))
                goto fail_page;
@@ -99,7 +104,7 @@ void hfs_btree_close(struct hfs_btree *tree)
                while ((node = tree->node_hash[i])) {
                        tree->node_hash[i] = node->next_hash;
                        if (atomic_read(&node->refcnt))
-                                printk("HFS+: node %d:%d still has %d user(s)!\n",
+                                printk(KERN_CRIT "hfs: node %d:%d still has %d user(s)!\n",
                                        node->tree->cnid, node->this, atomic_read(&node->refcnt));
                        hfs_bnode_free(node);
                        tree->node_hash_cnt--;
@@ -223,10 +228,6 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
                                                tree->free_nodes--;
                                                mark_inode_dirty(tree->inode);
                                                hfs_bnode_put(node);
-                                                if (!idx) {
-                                                        printk("unexpected idx %u (%u)\n", idx, node->this);
-                                                        BUG();
-                                                }
                                                return hfs_bnode_create(tree, idx);
                                        }
                                }
@@ -242,7 +243,7 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
                kunmap(*pagep);
                nidx = node->next;
                if (!nidx) {
-                        printk("create new bmap node...\n");
+                        printk(KERN_DEBUG "hfs: create new bmap node...\n");
                        next_node = hfs_bmap_new_bmap(node, idx);
                } else
                        next_node = hfs_bnode_find(tree, nidx);
@@ -284,7 +285,7 @@ void hfs_bmap_free(struct hfs_bnode *node)
                hfs_bnode_put(node);
                if (!i) {
                        /* panic */;
-                        printk("HFS: unable to free bnode %u. bmap not found!\n", node->this);
+                        printk(KERN_CRIT "hfs: unable to free bnode %u. bmap not found!\n", node->this);
                        return;
                }
                node = hfs_bnode_find(tree, i);
@@ -292,7 +293,7 @@ void hfs_bmap_free(struct hfs_bnode *node)
                        return;
                if (node->type != HFS_NODE_MAP) {
                        /* panic */;
-                        printk("HFS: invalid bmap found! (%u,%d)\n", node->this, node->type);
+                        printk(KERN_CRIT "hfs: invalid bmap found! (%u,%d)\n", node->this, node->type);
                        hfs_bnode_put(node);
                        return;
                }
@@ -305,7 +306,7 @@ void hfs_bmap_free(struct hfs_bnode *node)
        m = 1 << (~nidx & 7);
        byte = data[off];
        if (!(byte & m)) {
-                printk("HFS: trying to free free bnode %u(%d)\n", node->this, node->type);
+                printk(KERN_CRIT "hfs: trying to free free bnode %u(%d)\n", node->this, node->type);
                kunmap(page);
                hfs_bnode_put(node);
                return;
diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c
index 94712790c8b3..f2d7c49ce759 100644
--- a/fs/hfsplus/catalog.c
+++ b/fs/hfsplus/catalog.c
@@ -13,7 +13,8 @@
 #include "hfsplus_fs.h"
 #include "hfsplus_raw.h"
-int hfsplus_cat_cmp_key(hfsplus_btree_key *k1, hfsplus_btree_key *k2)
+int hfsplus_cat_case_cmp_key(const hfsplus_btree_key *k1,
+                             const hfsplus_btree_key *k2)
 {
        __be32 k1p, k2p;
@@ -22,7 +23,20 @@ int hfsplus_cat_cmp_key(hfsplus_btree_key *k1, hfsplus_btree_key *k2)
        if (k1p != k2p)
                return be32_to_cpu(k1p) < be32_to_cpu(k2p) ? -1 : 1;
-        return hfsplus_unistrcmp(&k1->cat.name, &k2->cat.name);
+        return hfsplus_strcasecmp(&k1->cat.name, &k2->cat.name);
+}
+int hfsplus_cat_bin_cmp_key(const hfsplus_btree_key *k1,
+                            const hfsplus_btree_key *k2)
+{
+        __be32 k1p, k2p;
+        k1p = k1->cat.parent;
+        k2p = k2->cat.parent;
+        if (k1p != k2p)
+                return be32_to_cpu(k1p) < be32_to_cpu(k2p) ? -1 : 1;
+        return hfsplus_strcmp(&k1->cat.name, &k2->cat.name);
 }
 void hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key,
@@ -80,8 +94,11 @@ static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, u32 cnid, struct i
                memset(folder, 0, sizeof(*folder));
                folder->type = cpu_to_be16(HFSPLUS_FOLDER);
                folder->id = cpu_to_be32(inode->i_ino);
-                folder->create_date = folder->content_mod_date =
+                HFSPLUS_I(inode).create_date =
-                        folder->attribute_mod_date = folder->access_date = hfsp_now2mt();
+                        folder->create_date =
+                        folder->content_mod_date =
+                        folder->attribute_mod_date =
+                        folder->access_date = hfsp_now2mt();
                hfsplus_set_perms(inode, &folder->permissions);
                if (inode == HFSPLUS_SB(inode->i_sb).hidden_dir)
                        /* invisible and namelocked */
@@ -95,18 +112,27 @@ static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, u32 cnid, struct i
                file->type = cpu_to_be16(HFSPLUS_FILE);
                file->flags = cpu_to_be16(HFSPLUS_FILE_THREAD_EXISTS);
                file->id = cpu_to_be32(cnid);
-                file->create_date = file->content_mod_date =
+                HFSPLUS_I(inode).create_date =
-                        file->attribute_mod_date = file->access_date = hfsp_now2mt();
+                        file->create_date =
+                        file->content_mod_date =
+                        file->attribute_mod_date =
+                        file->access_date = hfsp_now2mt();
                if (cnid == inode->i_ino) {
                        hfsplus_set_perms(inode, &file->permissions);
-                        file->user_info.fdType = cpu_to_be32(HFSPLUS_SB(inode->i_sb).type);
+                        if (S_ISLNK(inode->i_mode)) {
-                        file->user_info.fdCreator = cpu_to_be32(HFSPLUS_SB(inode->i_sb).creator);
+                                file->user_info.fdType = cpu_to_be32(HFSP_SYMLINK_TYPE);
+                                file->user_info.fdCreator = cpu_to_be32(HFSP_SYMLINK_CREATOR);
+                        } else {
+                                file->user_info.fdType = cpu_to_be32(HFSPLUS_SB(inode->i_sb).type);
+                                file->user_info.fdCreator = cpu_to_be32(HFSPLUS_SB(inode->i_sb).creator);
+                        }
                        if ((file->permissions.rootflags | file->permissions.userflags) & HFSPLUS_FLG_IMMUTABLE)
                                file->flags |= cpu_to_be16(HFSPLUS_FILE_LOCKED);
                } else {
                        file->user_info.fdType = cpu_to_be32(HFSP_HARDLINK_TYPE);
                        file->user_info.fdCreator = cpu_to_be32(HFSP_HFSPLUS_CREATOR);
                        file->user_info.fdFlags = cpu_to_be16(0x100);
+                        file->create_date = HFSPLUS_I(HFSPLUS_SB(inode->i_sb).hidden_dir).create_date;
                        file->permissions.dev = cpu_to_be32(HFSPLUS_I(inode).dev);
                }
                return sizeof(*file);
@@ -139,7 +165,7 @@ int hfsplus_find_cat(struct super_block *sb, u32 cnid,
        type = be16_to_cpu(tmp.type);
        if (type != HFSPLUS_FOLDER_THREAD && type != HFSPLUS_FILE_THREAD) {
-                printk("HFS+-fs: Found bad thread record in catalog\n");
+                printk(KERN_ERR "hfs: found bad thread record in catalog\n");
                return -EIO;
        }
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 50c8f44b6c66..01a6fe3a395c 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -66,25 +66,32 @@ again:
                }
                cnid = be32_to_cpu(entry.file.id);
                if (entry.file.user_info.fdType == cpu_to_be32(HFSP_HARDLINK_TYPE) &&
-                    entry.file.user_info.fdCreator == cpu_to_be32(HFSP_HFSPLUS_CREATOR)) {
+                    entry.file.user_info.fdCreator == cpu_to_be32(HFSP_HFSPLUS_CREATOR) &&
+                    (entry.file.create_date == HFSPLUS_I(HFSPLUS_SB(sb).hidden_dir).create_date ||
+                     entry.file.create_date == HFSPLUS_I(sb->s_root->d_inode).create_date) &&
+                    HFSPLUS_SB(sb).hidden_dir) {
                        struct qstr str;
                        char name[32];
                        if (dentry->d_fsdata) {
-                                err = -ENOENT;
+                                /*
-                                inode = NULL;
+                                 * We found a link pointing to another link,
-                                goto out;
+                                 * so ignore it and treat it as regular file.
+                                 */
+                                cnid = (unsigned long)dentry->d_fsdata;
+                                linkid = 0;
+                        } else {
+                                dentry->d_fsdata = (void *)(unsigned long)cnid;
+                                linkid = be32_to_cpu(entry.file.permissions.dev);
+                                str.len = sprintf(name, "iNode%d", linkid);
+                                str.name = name;
+                                hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_SB(sb).hidden_dir->i_ino, &str);
+                                goto again;
                        }
-                        dentry->d_fsdata = (void *)(unsigned long)cnid;
-                        linkid = be32_to_cpu(entry.file.permissions.dev);
-                        str.len = sprintf(name, "iNode%d", linkid);
-                        str.name = name;
-                        hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_SB(sb).hidden_dir->i_ino, &str);
-                        goto again;
                } else if (!dentry->d_fsdata)
                        dentry->d_fsdata = (void *)(unsigned long)cnid;
        } else {
-                printk("HFS+-fs: Illegal catalog entry type in lookup\n");
+                printk(KERN_ERR "hfs: invalid catalog entry type in lookup\n");
                err = -EIO;
                goto fail;
        }
@@ -132,12 +139,12 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir)
        case 1:
                hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, fd.entrylength);
                if (be16_to_cpu(entry.type) != HFSPLUS_FOLDER_THREAD) {
-                        printk("HFS+-fs: bad catalog folder thread\n");
+                        printk(KERN_ERR "hfs: bad catalog folder thread\n");
                        err = -EIO;
                        goto out;
                }
                if (fd.entrylength < HFSPLUS_MIN_THREAD_SZ) {
-                        printk("HFS+-fs: truncated catalog thread\n");
+                        printk(KERN_ERR "hfs: truncated catalog thread\n");
                        err = -EIO;
                        goto out;
                }
@@ -156,7 +163,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir)
        for (;;) {
                if (be32_to_cpu(fd.key->cat.parent) != inode->i_ino) {
-                        printk("HFS+-fs: walked past end of dir\n");
+                        printk(KERN_ERR "hfs: walked past end of dir\n");
                        err = -EIO;
                        goto out;
                }
@@ -168,7 +175,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir)
                        goto out;
                if (type == HFSPLUS_FOLDER) {
                        if (fd.entrylength < sizeof(struct hfsplus_cat_folder)) {
-                                printk("HFS+-fs: small dir entry\n");
+                                printk(KERN_ERR "hfs: small dir entry\n");
                                err = -EIO;
                                goto out;
                        }
@@ -180,7 +187,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir)
                                break;
                } else if (type == HFSPLUS_FILE) {
                        if (fd.entrylength < sizeof(struct hfsplus_cat_file)) {
-                                printk("HFS+-fs: small file entry\n");
+                                printk(KERN_ERR "hfs: small file entry\n");
                                err = -EIO;
                                goto out;
                        }
@@ -188,7 +195,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir)
                                    be32_to_cpu(entry.file.id), DT_REG))
                                break;
                } else {
-                        printk("HFS+-fs: bad catalog entry type\n");
+                        printk(KERN_ERR "hfs: bad catalog entry type\n");
                        err = -EIO;
                        goto out;
                }
@@ -330,7 +337,8 @@ static int hfsplus_unlink(struct inode *dir, struct dentry *dentry)
        if (res)
                return res;
-        inode->i_nlink--;
+        if (inode->i_nlink > 0)
+                inode->i_nlink--;
        hfsplus_delete_inode(inode);
        if (inode->i_ino != cnid && !inode->i_nlink) {
                if (!atomic_read(&HFSPLUS_I(inode).opencnt)) {
@@ -339,7 +347,8 @@ static int hfsplus_unlink(struct inode *dir, struct dentry *dentry)
                                hfsplus_delete_inode(inode);
                } else
                        inode->i_flags |= S_DEAD;
-        }
+        } else
+                inode->i_nlink = 0;
        inode->i_ctime = CURRENT_TIME_SEC;
        mark_inode_dirty(inode);
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index e3ff56a03011..1a7480089e82 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -16,7 +16,8 @@
 #include "hfsplus_raw.h"
 /* Compare two extents keys, returns 0 on same, pos/neg for difference */
-int hfsplus_ext_cmp_key(hfsplus_btree_key *k1, hfsplus_btree_key *k2)
+int hfsplus_ext_cmp_key(const hfsplus_btree_key *k1,
+                        const hfsplus_btree_key *k2)
 {
        __be32 k1id, k2id;
        __be32 k1s, k2s;
@@ -349,10 +350,9 @@ int hfsplus_file_extend(struct inode *inode)
        if (HFSPLUS_SB(sb).alloc_file->i_size * 8 < HFSPLUS_SB(sb).total_blocks - HFSPLUS_SB(sb).free_blocks + 8) {
                // extend alloc file
-                printk("extend alloc file! (%Lu,%u,%u)\n", HFSPLUS_SB(sb).alloc_file->i_size * 8,
+                printk(KERN_ERR "hfs: extend alloc file! (%Lu,%u,%u)\n", HFSPLUS_SB(sb).alloc_file->i_size * 8,
                        HFSPLUS_SB(sb).total_blocks, HFSPLUS_SB(sb).free_blocks);
                return -ENOSPC;
-                //BUG();
        }
        down(&HFSPLUS_I(inode).extents_lock);
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index df16fcbff3fb..7ae393637a0c 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -36,7 +36,7 @@
 #define HFSPLUS_TYPE_DATA 0x00
 #define HFSPLUS_TYPE_RSRC 0xFF
-typedef int (*btree_keycmp)(hfsplus_btree_key *, hfsplus_btree_key *);
+typedef int (*btree_keycmp)(const hfsplus_btree_key *, const hfsplus_btree_key *);
 #define NODE_HASH_SIZE  256
@@ -143,15 +143,13 @@ struct hfsplus_sb_info {
        unsigned long flags;
-        atomic_t inode_cnt;
-        u32 last_inode_cnt;
        struct hlist_head rsrc_inodes;
 };
 #define HFSPLUS_SB_WRITEBACKUP  0x0001
 #define HFSPLUS_SB_NODECOMPOSE  0x0002
 #define HFSPLUS_SB_FORCE        0x0004
+#define HFSPLUS_SB_HFSX         0x0008
 struct hfsplus_inode_info {
@@ -168,6 +166,7 @@ struct hfsplus_inode_info {
        struct inode *rsrc_inode;
        unsigned long flags;
+        __be32 create_date;
        /* Device number in hfsplus_permissions in catalog */
        u32 dev;
        /* BSD system and user file flags */
@@ -306,7 +305,8 @@ int hfs_brec_read(struct hfs_find_data *, void *, int);
 int hfs_brec_goto(struct hfs_find_data *, int);
 /* catalog.c */
-int hfsplus_cat_cmp_key(hfsplus_btree_key *, hfsplus_btree_key *);
+int hfsplus_cat_case_cmp_key(const hfsplus_btree_key *, const hfsplus_btree_key *);
+int hfsplus_cat_bin_cmp_key(const hfsplus_btree_key *, const hfsplus_btree_key *);
 void hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *, u32, struct qstr *);
 int hfsplus_find_cat(struct super_block *, u32, struct hfs_find_data *);
 int hfsplus_create_cat(u32, struct inode *, struct qstr *, struct inode *);
@@ -315,7 +315,7 @@ int hfsplus_rename_cat(u32, struct inode *, struct qstr *,
                       struct inode *, struct qstr *);
 /* extents.c */
-int hfsplus_ext_cmp_key(hfsplus_btree_key *, hfsplus_btree_key *);
+int hfsplus_ext_cmp_key(const hfsplus_btree_key *, const hfsplus_btree_key *);
 void hfsplus_ext_write_extent(struct inode *);
 int hfsplus_get_block(struct inode *, sector_t, struct buffer_head *, int);
 int hfsplus_free_fork(struct super_block *, u32, struct hfsplus_fork_raw *, int);
@@ -353,7 +353,8 @@ extern u16 hfsplus_decompose_table[];
 extern u16 hfsplus_compose_table[];
 /* unicode.c */
-int hfsplus_unistrcmp(const struct hfsplus_unistr *, const struct hfsplus_unistr *);
+int hfsplus_strcasecmp(const struct hfsplus_unistr *, const struct hfsplus_unistr *);
+int hfsplus_strcmp(const struct hfsplus_unistr *, const struct hfsplus_unistr *);
 int hfsplus_uni2asc(struct super_block *, const struct hfsplus_unistr *, char *, int *);
 int hfsplus_asc2uni(struct super_block *, struct hfsplus_unistr *, const char *, int);
diff --git a/fs/hfsplus/hfsplus_raw.h b/fs/hfsplus/hfsplus_raw.h
index b4fbed633219..49205531a500 100644
--- a/fs/hfsplus/hfsplus_raw.h
+++ b/fs/hfsplus/hfsplus_raw.h
@@ -22,8 +22,10 @@
 #define HFSPLUS_SECTOR_SHIFT         9
 #define HFSPLUS_VOLHEAD_SECTOR       2
 #define HFSPLUS_VOLHEAD_SIG     0x482b
+#define HFSPLUS_VOLHEAD_SIGX    0x4858
 #define HFSPLUS_SUPER_MAGIC     0x482b
-#define HFSPLUS_CURRENT_VERSION      4
+#define HFSPLUS_MIN_VERSION          4
+#define HFSPLUS_CURRENT_VERSION      5
 #define HFSP_WRAP_MAGIC         0x4244
 #define HFSP_WRAP_ATTRIB_SLOCK  0x8000
@@ -41,6 +43,9 @@
 #define HFSP_HARDLINK_TYPE      0x686c6e6b      /* 'hlnk' */
 #define HFSP_HFSPLUS_CREATOR    0x6866732b      /* 'hfs+' */
+#define HFSP_SYMLINK_TYPE       0x736c6e6b      /* 'slnk' */
+#define HFSP_SYMLINK_CREATOR    0x72686170      /* 'rhap' */
 #define HFSP_MOUNT_VERSION      0x482b4c78      /* 'H+Lx' */
 /* Structures used on disk */
@@ -161,7 +166,7 @@ struct hfs_btree_header_rec {
        u16 reserved1;
        __be32 clump_size;
        u8 btree_type;
-        u8 reserved2;
+        u8 key_type;
        __be32 attributes;
        u32 reserved3[16];
 } __packed;
@@ -186,6 +191,10 @@ struct hfs_btree_header_rec {
 #define HFSPLUS_EXCH_CNID               15      /* ExchangeFiles temp id */
 #define HFSPLUS_FIRSTUSER_CNID          16      /* first available user id */
+/* btree key type */
+#define HFSPLUS_KEY_CASEFOLDING         0xCF    /* case-insensitive */
+#define HFSPLUS_KEY_BINARY              0xBC    /* case-sensitive */
 /* HFS+ catalog entry key */
 struct hfsplus_cat_key {
        __be16 key_len;
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index fc98583cf045..12ed2b7d046b 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -18,13 +18,11 @@
 static int hfsplus_readpage(struct file *file, struct page *page)
 {
-        //printk("readpage: %lu\n", page->index);
        return block_read_full_page(page, hfsplus_get_block);
 }
 static int hfsplus_writepage(struct page *page, struct writeback_control *wbc)
 {
-        //printk("writepage: %lu\n", page->index);
        return block_write_full_page(page, hfsplus_get_block, wbc);
 }
@@ -92,7 +90,6 @@ static int hfsplus_releasepage(struct page *page, gfp_t mask)
                } while (--i && nidx < tree->node_count);
                spin_unlock(&tree->hash_lock);
        }
-        //printk("releasepage: %lu,%x = %d\n", page->index, mask, res);
        return res ? try_to_free_buffers(page) : 0;
 }
@@ -182,11 +179,6 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dent
        igrab(dir);
        hlist_add_head(&inode->i_hash, &HFSPLUS_SB(sb).rsrc_inodes);
        mark_inode_dirty(inode);
-        {
-        void hfsplus_inode_check(struct super_block *sb);
-        atomic_inc(&HFSPLUS_SB(sb).inode_cnt);
-        hfsplus_inode_check(sb);
-        }
 out:
        d_add(dentry, inode);
        return NULL;
@@ -276,13 +268,13 @@ static int hfsplus_file_release(struct inode *inode, struct file *file)
        if (atomic_read(&file->f_count) != 0)
                return 0;
        if (atomic_dec_and_test(&HFSPLUS_I(inode).opencnt)) {
-                down(&inode->i_sem);
+                mutex_lock(&inode->i_mutex);
                hfsplus_file_truncate(inode);
                if (inode->i_flags & S_DEAD) {
                        hfsplus_delete_cat(inode->i_ino, HFSPLUS_SB(sb).hidden_dir, NULL);
                        hfsplus_delete_inode(inode);
                }
-                up(&inode->i_sem);
+                mutex_unlock(&inode->i_mutex);
        }
        return 0;
 }
@@ -317,11 +309,6 @@ struct inode *hfsplus_new_inode(struct super_block *sb, int mode)
        if (!inode)
                return NULL;
-        {
-        void hfsplus_inode_check(struct super_block *sb);
-        atomic_inc(&HFSPLUS_SB(sb).inode_cnt);
-        hfsplus_inode_check(sb);
-        }
        inode->i_ino = HFSPLUS_SB(sb).next_cnid++;
        inode->i_mode = mode;
        inode->i_uid = current->fsuid;
@@ -444,7 +431,8 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd)
                inode->i_size = 2 + be32_to_cpu(folder->valence);
                inode->i_atime = hfsp_mt2ut(folder->access_date);
                inode->i_mtime = hfsp_mt2ut(folder->content_mod_date);
-                inode->i_ctime = inode->i_mtime;
+                inode->i_ctime = hfsp_mt2ut(folder->attribute_mod_date);
+                HFSPLUS_I(inode).create_date = folder->create_date;
                HFSPLUS_I(inode).fs_blocks = 0;
                inode->i_op = &hfsplus_dir_inode_operations;
                inode->i_fop = &hfsplus_dir_operations;
@@ -475,9 +463,10 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd)
                }
                inode->i_atime = hfsp_mt2ut(file->access_date);
                inode->i_mtime = hfsp_mt2ut(file->content_mod_date);
-                inode->i_ctime = inode->i_mtime;
+                inode->i_ctime = hfsp_mt2ut(file->attribute_mod_date);
+                HFSPLUS_I(inode).create_date = file->create_date;
        } else {
-                printk("HFS+-fs: bad catalog entry used to create inode\n");
+                printk(KERN_ERR "hfs: bad catalog entry used to create inode\n");
                res = -EIO;
        }
        return res;
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c
index e07aa096e07c..13cf848ac833 100644
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -12,6 +12,7 @@
 * hfsplus ioctls
 */
+#include <linux/capability.h>
 #include <linux/fs.h>
 #include <linux/sched.h>
 #include <linux/xattr.h>
diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c
index 935dafba0078..dc64fac00831 100644
--- a/fs/hfsplus/options.c
+++ b/fs/hfsplus/options.c
@@ -83,58 +83,58 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi)
                switch (token) {
                case opt_creator:
                        if (match_fourchar(&args[0], &sbi->creator)) {
-                                printk("HFS+-fs: creator requires a 4 character value\n");
+                                printk(KERN_ERR "hfs: creator requires a 4 character value\n");
                                return 0;
                        }
                        break;
                case opt_type:
                        if (match_fourchar(&args[0], &sbi->type)) {
-                                printk("HFS+-fs: type requires a 4 character value\n");
+                                printk(KERN_ERR "hfs: type requires a 4 character value\n");
                                return 0;
                        }
                        break;
                case opt_umask:
                        if (match_octal(&args[0], &tmp)) {
-                                printk("HFS+-fs: umask requires a value\n");
+                                printk(KERN_ERR "hfs: umask requires a value\n");
                                return 0;
                        }
                        sbi->umask = (umode_t)tmp;
                        break;
                case opt_uid:
                        if (match_int(&args[0], &tmp)) {
-                                printk("HFS+-fs: uid requires an argument\n");
+                                printk(KERN_ERR "hfs: uid requires an argument\n");
                                return 0;
                        }
                        sbi->uid = (uid_t)tmp;
                        break;
                case opt_gid:
                        if (match_int(&args[0], &tmp)) {
-                                printk("HFS+-fs: gid requires an argument\n");
+                                printk(KERN_ERR "hfs: gid requires an argument\n");
                                return 0;
                        }
                        sbi->gid = (gid_t)tmp;
                        break;
                case opt_part:
                        if (match_int(&args[0], &sbi->part)) {
-                                printk("HFS+-fs: part requires an argument\n");
+                                printk(KERN_ERR "hfs: part requires an argument\n");
                                return 0;
                        }
                        break;
                case opt_session:
                        if (match_int(&args[0], &sbi->session)) {
-                                printk("HFS+-fs: session requires an argument\n");
+                                printk(KERN_ERR "hfs: session requires an argument\n");
                                return 0;
                        }
                        break;
                case opt_nls:
                        if (sbi->nls) {
-                                printk("HFS+-fs: unable to change nls mapping\n");
+                                printk(KERN_ERR "hfs: unable to change nls mapping\n");
                                return 0;
                        }
                        p = match_strdup(&args[0]);
                        sbi->nls = load_nls(p);
                        if (!sbi->nls) {
-                                printk("HFS+-fs: unable to load nls mapping \"%s\"\n", p);
+                                printk(KERN_ERR "hfs: unable to load nls mapping \"%s\"\n", p);
                                kfree(p);
                                return 0;
                        }
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 6daaf7c755a6..7843f792a4b7 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -22,29 +22,12 @@ static void hfsplus_destroy_inode(struct inode *inode);
 #include "hfsplus_fs.h"
-void hfsplus_inode_check(struct super_block *sb)
-{
-#if 0
-        u32 cnt = atomic_read(&HFSPLUS_SB(sb).inode_cnt);
-        u32 last_cnt = HFSPLUS_SB(sb).last_inode_cnt;
-        if (cnt <= (last_cnt / 2) ||
-            cnt >= (last_cnt * 2)) {
-                HFSPLUS_SB(sb).last_inode_cnt = cnt;
-                printk("inode_check: %u,%u,%u\n", cnt, last_cnt,
-                        HFSPLUS_SB(sb).cat_tree ? HFSPLUS_SB(sb).cat_tree->node_hash_cnt : 0);
-        }
-#endif
-}
 static void hfsplus_read_inode(struct inode *inode)
 {
        struct hfs_find_data fd;
        struct hfsplus_vh *vhdr;
        int err;
-        atomic_inc(&HFSPLUS_SB(inode->i_sb).inode_cnt);
-        hfsplus_inode_check(inode->i_sb);
        INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list);
        init_MUTEX(&HFSPLUS_I(inode).extents_lock);
        HFSPLUS_I(inode).flags = 0;
@@ -155,12 +138,10 @@ static int hfsplus_write_inode(struct inode *inode, int unused)
 static void hfsplus_clear_inode(struct inode *inode)
 {
        dprint(DBG_INODE, "hfsplus_clear_inode: %lu\n", inode->i_ino);
-        atomic_dec(&HFSPLUS_SB(inode->i_sb).inode_cnt);
        if (HFSPLUS_IS_RSRC(inode)) {
                HFSPLUS_I(HFSPLUS_I(inode).rsrc_inode).rsrc_inode = NULL;
                iput(HFSPLUS_I(inode).rsrc_inode);
        }
-        hfsplus_inode_check(inode->i_sb);
 }
 static void hfsplus_write_super(struct super_block *sb)
@@ -188,7 +169,7 @@ static void hfsplus_write_super(struct super_block *sb)
                        block = HFSPLUS_SB(sb).blockoffset;
                        block += (HFSPLUS_SB(sb).sect_count - 2) >> (sb->s_blocksize_bits - 9);
                        offset = ((HFSPLUS_SB(sb).sect_count - 2) << 9) & (sb->s_blocksize - 1);
-                        printk("backup: %u,%u,%u,%u\n", HFSPLUS_SB(sb).blockoffset,
+                        printk(KERN_DEBUG "hfs: backup: %u,%u,%u,%u\n", HFSPLUS_SB(sb).blockoffset,
                                HFSPLUS_SB(sb).sect_count, block, offset);
                        bh = sb_bread(sb, block);
                        if (bh) {
@@ -198,7 +179,7 @@ static void hfsplus_write_super(struct super_block *sb)
                                        mark_buffer_dirty(bh);
                                        brelse(bh);
                                } else
-                                        printk("backup not found!\n");
+                                        printk(KERN_WARNING "hfs: backup not found!\n");
                        }
                }
                HFSPLUS_SB(sb).flags &= ~HFSPLUS_SB_WRITEBACKUP;
@@ -259,18 +240,18 @@ static int hfsplus_remount(struct super_block *sb, int *flags, char *data)
                        return -EINVAL;
                if (!(vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_UNMNT))) {
-                        printk("HFS+-fs warning: Filesystem was not cleanly unmounted, "
+                        printk(KERN_WARNING "hfs: filesystem was not cleanly unmounted, "
                               "running fsck.hfsplus is recommended.  leaving read-only.\n");
                        sb->s_flags |= MS_RDONLY;
                        *flags |= MS_RDONLY;
                } else if (sbi.flags & HFSPLUS_SB_FORCE) {
                        /* nothing */
                } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) {
-                        printk("HFS+-fs: Filesystem is marked locked, leaving read-only.\n");
+                        printk(KERN_WARNING "hfs: filesystem is marked locked, leaving read-only.\n");
                        sb->s_flags |= MS_RDONLY;
                        *flags |= MS_RDONLY;
                } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) {
-                        printk("HFS+-fs: Filesystem is marked journaled, leaving read-only.\n");
+                        printk(KERN_WARNING "hfs: filesystem is marked journaled, leaving read-only.\n");
                        sb->s_flags |= MS_RDONLY;
                        *flags |= MS_RDONLY;
                }
@@ -311,8 +292,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
        INIT_HLIST_HEAD(&sbi->rsrc_inodes);
        hfsplus_fill_defaults(sbi);
        if (!hfsplus_parse_options(data, sbi)) {
-                if (!silent)
+                printk(KERN_ERR "hfs: unable to parse mount options\n");
-                        printk("HFS+-fs: unable to parse mount options\n");
                err = -EINVAL;
                goto cleanup;
        }
@@ -321,7 +301,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
        nls = sbi->nls;
        sbi->nls = load_nls("utf8");
        if (!sbi->nls) {
-                printk("HFS+: unable to load nls for utf8\n");
+                printk(KERN_ERR "hfs: unable to load nls for utf8\n");
                err = -EINVAL;
                goto cleanup;
        }
@@ -329,17 +309,17 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
        /* Grab the volume header */
        if (hfsplus_read_wrapper(sb)) {
                if (!silent)
-                        printk("HFS+-fs: unable to find HFS+ superblock\n");
+                        printk(KERN_WARNING "hfs: unable to find HFS+ superblock\n");
                err = -EINVAL;
                goto cleanup;
        }
        vhdr = HFSPLUS_SB(sb).s_vhdr;
        /* Copy parts of the volume header into the superblock */
-        sb->s_magic = be16_to_cpu(vhdr->signature);
+        sb->s_magic = HFSPLUS_VOLHEAD_SIG;
-        if (be16_to_cpu(vhdr->version) != HFSPLUS_CURRENT_VERSION) {
+        if (be16_to_cpu(vhdr->version) < HFSPLUS_MIN_VERSION ||
-                if (!silent)
+            be16_to_cpu(vhdr->version) > HFSPLUS_CURRENT_VERSION) {
-                        printk("HFS+-fs: wrong filesystem version\n");
+                printk(KERN_ERR "hfs: wrong filesystem version\n");
                goto cleanup;
        }
        HFSPLUS_SB(sb).total_blocks = be32_to_cpu(vhdr->total_blocks);
@@ -360,20 +340,17 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
        sb->s_maxbytes = MAX_LFS_FILESIZE;
        if (!(vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_UNMNT))) {
-                if (!silent)
+                printk(KERN_WARNING "hfs: Filesystem was not cleanly unmounted, "
-                        printk("HFS+-fs warning: Filesystem was not cleanly unmounted, "
+                       "running fsck.hfsplus is recommended.  mounting read-only.\n");
-                               "running fsck.hfsplus is recommended.  mounting read-only.\n");
                sb->s_flags |= MS_RDONLY;
        } else if (sbi->flags & HFSPLUS_SB_FORCE) {
                /* nothing */
        } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) {
-                if (!silent)
+                printk(KERN_WARNING "hfs: Filesystem is marked locked, mounting read-only.\n");
-                        printk("HFS+-fs: Filesystem is marked locked, mounting read-only.\n");
                sb->s_flags |= MS_RDONLY;
        } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) {
-                if (!silent)
+                printk(KERN_WARNING "hfs: write access to a jounaled filesystem is not supported, "
-                        printk("HFS+-fs: write access to a jounaled filesystem is not supported, "
+                       "use the force option at your own risk, mounting read-only.\n");
-                               "use the force option at your own risk, mounting read-only.\n");
                sb->s_flags |= MS_RDONLY;
        }
        sbi->flags &= ~HFSPLUS_SB_FORCE;
@@ -381,21 +358,18 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
        /* Load metadata objects (B*Trees) */
        HFSPLUS_SB(sb).ext_tree = hfs_btree_open(sb, HFSPLUS_EXT_CNID);
        if (!HFSPLUS_SB(sb).ext_tree) {
-                if (!silent)
+                printk(KERN_ERR "hfs: failed to load extents file\n");
-                        printk("HFS+-fs: failed to load extents file\n");
                goto cleanup;
        }
        HFSPLUS_SB(sb).cat_tree = hfs_btree_open(sb, HFSPLUS_CAT_CNID);
        if (!HFSPLUS_SB(sb).cat_tree) {
-                if (!silent)
+                printk(KERN_ERR "hfs: failed to load catalog file\n");
-                        printk("HFS+-fs: failed to load catalog file\n");
                goto cleanup;
        }
        HFSPLUS_SB(sb).alloc_file = iget(sb, HFSPLUS_ALLOC_CNID);
        if (!HFSPLUS_SB(sb).alloc_file) {
-                if (!silent)
+                printk(KERN_ERR "hfs: failed to load allocation file\n");
-                        printk("HFS+-fs: failed to load allocation file\n");
                goto cleanup;
        }
@@ -403,8 +377,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
        root = iget(sb, HFSPLUS_ROOT_CNID);
        sb->s_root = d_alloc_root(root);
        if (!sb->s_root) {
-                if (!silent)
+                printk(KERN_ERR "hfs: failed to load root directory\n");
-                        printk("HFS+-fs: failed to load root directory\n");
                iput(root);
                goto cleanup;
        }
@@ -438,7 +411,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
        sync_dirty_buffer(HFSPLUS_SB(sb).s_vhbh);
        if (!HFSPLUS_SB(sb).hidden_dir) {
-                printk("HFS+: create hidden dir...\n");
+                printk(KERN_DEBUG "hfs: create hidden dir...\n");
                HFSPLUS_SB(sb).hidden_dir = hfsplus_new_inode(sb, S_IFDIR);
                hfsplus_create_cat(HFSPLUS_SB(sb).hidden_dir->i_ino, sb->s_root->d_inode,
                                   &str, HFSPLUS_SB(sb).hidden_dir);
@@ -518,7 +491,7 @@ static void __exit exit_hfsplus_fs(void)
 {
        unregister_filesystem(&hfsplus_fs_type);
        if (kmem_cache_destroy(hfsplus_inode_cachep))
-                printk(KERN_INFO "hfsplus_inode_cache: not all structures were freed\n");
+                printk(KERN_ERR "hfsplus_inode_cache: not all structures were freed\n");
 }
 module_init(init_hfsplus_fs)
diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c
index 060c69048c3d..689c8bd721fb 100644
--- a/fs/hfsplus/unicode.c
+++ b/fs/hfsplus/unicode.c
@@ -28,7 +28,8 @@ static inline u16 case_fold(u16 c)
 }
 /* Compare unicode strings, return values like normal strcmp */
-int hfsplus_unistrcmp(const struct hfsplus_unistr *s1, const struct hfsplus_unistr *s2)
+int hfsplus_strcasecmp(const struct hfsplus_unistr *s1,
+                       const struct hfsplus_unistr *s2)
 {
        u16 len1, len2, c1, c2;
        const hfsplus_unichr *p1, *p2;
@@ -59,6 +60,33 @@ int hfsplus_unistrcmp(const struct hfsplus_unistr *s1, const struct hfsplus_unis
        }
 }
+/* Compare names as a sequence of 16-bit unsigned integers */
+int hfsplus_strcmp(const struct hfsplus_unistr *s1,
+                   const struct hfsplus_unistr *s2)
+{
+        u16 len1, len2, c1, c2;
+        const hfsplus_unichr *p1, *p2;
+        int len;
+        len1 = be16_to_cpu(s1->length);
+        len2 = be16_to_cpu(s2->length);
+        p1 = s1->unicode;
+        p2 = s2->unicode;
+        for (len = min(len1, len2); len > 0; len--) {
+                c1 = be16_to_cpu(*p1);
+                c2 = be16_to_cpu(*p2);
+                if (c1 != c2)
+                        return c1 < c2 ? -1 : 1;
+                p1++;
+                p2++;
+        }
+        return len1 < len2 ? -1 :
+               len1 > len2 ? 1 : 0;
+}
 #define Hangul_SBase    0xac00
 #define Hangul_LBase    0x1100
 #define Hangul_VBase    0x1161
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index 95455e839231..72cab78f0509 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -28,8 +28,11 @@ static int hfsplus_read_mdb(void *bufptr, struct hfsplus_wd *wd)
 {
        u32 extent;
        u16 attrib;
+        __be16 sig;
-        if (be16_to_cpu(*(__be16 *)(bufptr + HFSP_WRAPOFF_EMBEDSIG)) != HFSPLUS_VOLHEAD_SIG)
+        sig = *(__be16 *)(bufptr + HFSP_WRAPOFF_EMBEDSIG);
+        if (sig != cpu_to_be16(HFSPLUS_VOLHEAD_SIG) &&
+            sig != cpu_to_be16(HFSPLUS_VOLHEAD_SIGX))
                return 0;
        attrib = be16_to_cpu(*(__be16 *)(bufptr + HFSP_WRAPOFF_ATTRIB));
@@ -70,7 +73,7 @@ static int hfsplus_get_last_session(struct super_block *sb,
                        *start = (sector_t)te.cdte_addr.lba << 2;
                        return 0;
                }
-                printk(KERN_ERR "HFS: Invalid session number or type of track\n");
+                printk(KERN_ERR "hfs: invalid session number or type of track\n");
                return -EINVAL;
        }
        ms_info.addr_format = CDROM_LBA;
@@ -114,6 +117,10 @@ int hfsplus_read_wrapper(struct super_block *sb)
                }
                if (vhdr->signature == cpu_to_be16(HFSPLUS_VOLHEAD_SIG))
                        break;
+                if (vhdr->signature == cpu_to_be16(HFSPLUS_VOLHEAD_SIGX)) {
+                        HFSPLUS_SB(sb).flags |= HFSPLUS_SB_HFSX;
+                        break;
+                }
                brelse(bh);
                /* check for a partition block
@@ -143,7 +150,7 @@ int hfsplus_read_wrapper(struct super_block *sb)
                blocksize >>= 1;
        if (sb_set_blocksize(sb, blocksize) != blocksize) {
-                printk("HFS+: unable to blocksize to %u!\n", blocksize);
+                printk(KERN_ERR "hfs: unable to set blocksize to %u!\n", blocksize);
                return -EINVAL;
        }
@@ -158,7 +165,9 @@ int hfsplus_read_wrapper(struct super_block *sb)
                return -EIO;
        /* should still be the same... */
-        if (be16_to_cpu(vhdr->signature) != HFSPLUS_VOLHEAD_SIG)
+        if (vhdr->signature != (HFSPLUS_SB(sb).flags & HFSPLUS_SB_HFSX ?
+                                cpu_to_be16(HFSPLUS_VOLHEAD_SIGX) :
+                                cpu_to_be16(HFSPLUS_VOLHEAD_SIG)))
                goto error;
        HFSPLUS_SB(sb).s_vhbh = bh;
        HFSPLUS_SB(sb).s_vhdr = vhdr;
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index 0217c3a04441..5591f9623aa2 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -32,19 +32,19 @@ static loff_t hpfs_dir_lseek(struct file *filp, loff_t off, int whence)
        /*printk("dir lseek\n");*/
        if (new_off == 0 || new_off == 1 || new_off == 11 || new_off == 12 || new_off == 13) goto ok;
-        down(&i->i_sem);
+        mutex_lock(&i->i_mutex);
        pos = ((loff_t) hpfs_de_as_down_as_possible(s, hpfs_inode->i_dno) << 4) + 1;
        while (pos != new_off) {
                if (map_pos_dirent(i, &pos, &qbh)) hpfs_brelse4(&qbh);
                else goto fail;
                if (pos == 12) goto fail;
        }
-        up(&i->i_sem);
+        mutex_unlock(&i->i_mutex);
 ok:
        unlock_kernel();
        return filp->f_pos = new_off;
 fail:
-        up(&i->i_sem);
+        mutex_unlock(&i->i_mutex);
        /*printk("illegal lseek: %016llx\n", new_off);*/
        unlock_kernel();
        return -ESPIPE;
diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c
index 52930915bad8..a44dc5897399 100644
--- a/fs/hppfs/hppfs_kern.c
+++ b/fs/hppfs/hppfs_kern.c
@@ -171,12 +171,12 @@ static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry,
        err = -ENOMEM;
        parent = HPPFS_I(ino)->proc_dentry;
-        down(&parent->d_inode->i_sem);
+        mutex_lock(&parent->d_inode->i_mutex);
        proc_dentry = d_lookup(parent, &dentry->d_name);
        if(proc_dentry == NULL){
                proc_dentry = d_alloc(parent, &dentry->d_name);
                if(proc_dentry == NULL){
-                        up(&parent->d_inode->i_sem);
+                        mutex_unlock(&parent->d_inode->i_mutex);
                        goto out;
                }
                new = (*parent->d_inode->i_op->lookup)(parent->d_inode,
@@ -186,7 +186,7 @@ static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry,
                        proc_dentry = new;
                }
        }
-        up(&parent->d_inode->i_sem);
+        mutex_unlock(&parent->d_inode->i_mutex);
        if(IS_ERR(proc_dentry))
                return(proc_dentry);
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 8c41315a6e42..f568102da1e8 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -18,6 +18,7 @@
 #include <linux/highmem.h>
 #include <linux/init.h>
 #include <linux/string.h>
+#include <linux/capability.h>
 #include <linux/backing-dev.h>
 #include <linux/hugetlb.h>
 #include <linux/pagevec.h>
@@ -118,7 +119,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
        vma_len = (loff_t)(vma->vm_end - vma->vm_start);
-        down(&inode->i_sem);
+        mutex_lock(&inode->i_mutex);
        file_accessed(file);
        vma->vm_flags |= VM_HUGETLB | VM_RESERVED;
        vma->vm_ops = &hugetlb_vm_ops;
@@ -133,7 +134,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
        if (inode->i_size < len)
                inode->i_size = len;
 out:
-        up(&inode->i_sem);
+        mutex_unlock(&inode->i_mutex);
        return ret;
 }
@@ -401,7 +402,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid,
                inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info;
                inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
                info = HUGETLBFS_I(inode);
-                mpol_shared_policy_init(&info->policy);
+                mpol_shared_policy_init(&info->policy, MPOL_DEFAULT, NULL);
                switch (mode & S_IFMT) {
                default:
                        init_special_inode(inode, mode, dev);
diff --git a/fs/inode.c b/fs/inode.c
index d8d04bd72b59..108138d4e909 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -22,6 +22,7 @@
 #include <linux/cdev.h>
 #include <linux/bootmem.h>
 #include <linux/inotify.h>
+#include <linux/mount.h>
 /*
 * This is needed for the following functions:
@@ -192,7 +193,7 @@ void inode_init_once(struct inode *inode)
        INIT_HLIST_NODE(&inode->i_hash);
        INIT_LIST_HEAD(&inode->i_dentry);
        INIT_LIST_HEAD(&inode->i_devices);
-        sema_init(&inode->i_sem, 1);
+        mutex_init(&inode->i_mutex);
        init_rwsem(&inode->i_alloc_sem);
        INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
        rwlock_init(&inode->i_data.tree_lock);
@@ -770,7 +771,7 @@ EXPORT_SYMBOL(igrab);
 *
 * Note, @test is called with the inode_lock held, so can't sleep.
 */
-static inline struct inode *ifind(struct super_block *sb,
+static struct inode *ifind(struct super_block *sb,
                struct hlist_head *head, int (*test)(struct inode *, void *),
                void *data, const int wait)
 {
@@ -804,7 +805,7 @@ static inline struct inode *ifind(struct super_block *sb,
 *
 * Otherwise NULL is returned.
 */
-static inline struct inode *ifind_fast(struct super_block *sb,
+static struct inode *ifind_fast(struct super_block *sb,
                struct hlist_head *head, unsigned long ino)
 {
        struct inode *inode;
@@ -1176,22 +1177,33 @@ sector_t bmap(struct inode * inode, sector_t block)
 EXPORT_SYMBOL(bmap);
 /**
- *      update_atime    -       update the access time
+ *      touch_atime     -       update the access time
+ *      @mnt: mount the inode is accessed on
 *      @inode: inode accessed
 *
 *      Update the accessed time on an inode and mark it for writeback.
 *      This function automatically handles read only file systems and media,
 *      as well as the "noatime" flag and inode specific "noatime" markers.
 */
-void update_atime(struct inode *inode)
+void touch_atime(struct vfsmount *mnt, struct dentry *dentry)
 {
+        struct inode *inode = dentry->d_inode;
        struct timespec now;
-        if (IS_NOATIME(inode))
+        if (IS_RDONLY(inode))
                return;
-        if (IS_NODIRATIME(inode) && S_ISDIR(inode->i_mode))
+        if ((inode->i_flags & S_NOATIME) ||
+            (inode->i_sb->s_flags & MS_NOATIME) ||
+            ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)))
                return;
-        if (IS_RDONLY(inode))
+        /*
+         * We may have a NULL vfsmount when coming from NFSD
+         */
+        if (mnt &&
+            ((mnt->mnt_flags & MNT_NOATIME) ||
+             ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))))
                return;
        now = current_fs_time(inode->i_sb);
@@ -1201,19 +1213,23 @@ void update_atime(struct inode *inode)
        }
 }
-EXPORT_SYMBOL(update_atime);
+EXPORT_SYMBOL(touch_atime);
 /**
- *      inode_update_time       -       update mtime and ctime time
+ *      file_update_time        -       update mtime and ctime time
- *      @inode: inode accessed
+ *      @file: file accessed
- *      @ctime_too: update ctime too
 *
- *      Update the mtime time on an inode and mark it for writeback.
+ *      Update the mtime and ctime members of an inode and mark the inode
- *      When ctime_too is specified update the ctime too.
+ *      for writeback.  Note that this function is meant exclusively for
+ *      usage in the file write path of filesystems, and filesystems may
+ *      choose to explicitly ignore update via this function with the
+ *      S_NOCTIME inode flag, e.g. for network filesystem where these
+ *      timestamps are handled by the server.
 */
-void inode_update_time(struct inode *inode, int ctime_too)
+void file_update_time(struct file *file)
 {
+        struct inode *inode = file->f_dentry->d_inode;
        struct timespec now;
        int sync_it = 0;
@@ -1227,16 +1243,15 @@ void inode_update_time(struct inode *inode, int ctime_too)
                sync_it = 1;
        inode->i_mtime = now;
-        if (ctime_too) {
+        if (!timespec_equal(&inode->i_ctime, &now))
-                if (!timespec_equal(&inode->i_ctime, &now))
+                sync_it = 1;
-                        sync_it = 1;
+        inode->i_ctime = now;
-                inode->i_ctime = now;
-        }
        if (sync_it)
                mark_inode_dirty_sync(inode);
 }
-EXPORT_SYMBOL(inode_update_time);
+EXPORT_SYMBOL(file_update_time);
 int inode_needs_sync(struct inode *inode)
 {
diff --git a/fs/inotify.c b/fs/inotify.c
index 2fecb7af4a77..878ccca61213 100644
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -33,6 +33,7 @@
 #include <linux/list.h>
 #include <linux/writeback.h>
 #include <linux/inotify.h>
+#include <linux/syscalls.h>
 #include <asm/ioctls.h>
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 569209181425..f8aeec3ca10c 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -8,6 +8,7 @@
 #include <linux/syscalls.h>
 #include <linux/mm.h>
 #include <linux/smp_lock.h>
+#include <linux/capability.h>
 #include <linux/file.h>
 #include <linux/fs.h>
 #include <linux/security.h>
diff --git a/fs/ioprio.c b/fs/ioprio.c
index 4bf1c6365a19..ca77008146c0 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -22,6 +22,7 @@
 #include <linux/kernel.h>
 #include <linux/ioprio.h>
 #include <linux/blkdev.h>
+#include <linux/capability.h>
 #include <linux/syscalls.h>
 static int set_task_ioprio(struct task_struct *task, int ioprio)
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c
index e37e82b7cbf0..e7ba0c30e071 100644
--- a/fs/isofs/namei.c
+++ b/fs/isofs/namei.c
@@ -185,8 +185,5 @@ struct dentry *isofs_lookup(struct inode * dir, struct dentry * dentry, struct n
                }
        }
        unlock_kernel();
-        if (inode)
+        return d_splice_alias(inode, dentry);
-                return d_splice_alias(inode, dentry);
-        d_add(dentry, inode);
-        return NULL;
 }
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index cb3cef525c3b..e6265a0b56b8 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -338,7 +338,7 @@ restart:
         * done (maybe it's a new transaction, but it fell at the same
         * address).
         */
-        if (journal->j_checkpoint_transactions == transaction ||
+        if (journal->j_checkpoint_transactions == transaction &&
                        transaction->t_tid == this_tid) {
                int batch_count = 0;
                struct buffer_head *bhs[NR_BATCH];
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 002ad2bbc769..29e62d98bae6 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -829,7 +829,8 @@ restart_loop:
        journal->j_committing_transaction = NULL;
        spin_unlock(&journal->j_state_lock);
-        if (commit_transaction->t_checkpoint_list == NULL) {
+        if (commit_transaction->t_checkpoint_list == NULL &&
+            commit_transaction->t_checkpoint_io_list == NULL) {
                __journal_drop_transaction(journal, commit_transaction);
        } else {
                if (journal->j_checkpoint_transactions == NULL) {
diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c
index 3dcc6d2162cb..fc3855a1aef3 100644
--- a/fs/jffs/inode-v23.c
+++ b/fs/jffs/inode-v23.c
@@ -757,7 +757,7 @@ jffs_do_readpage_nolock(struct file *file, struct page *page)
        read_len = 0;
        result = 0;
-        offset = page->index << PAGE_CACHE_SHIFT;
+        offset = page_offset(page);
        kmap(page);
        buf = page_address(page);
@@ -1415,7 +1415,7 @@ jffs_file_write(struct file *filp, const char *buf, size_t count,
         * This will never trigger with sane page sizes.  leave it in
         * anyway, since I'm thinking about how to merge larger writes
         * (the current idea is to poke a thread that does the actual
-         * I/O and starts by doing a down(&inode->i_sem).  then we
+         * I/O and starts by doing a mutex_lock(&inode->i_mutex).  then we
         * would need to get the page cache pages and have a list of
         * I/O requests and do write-merging here.
         * -- prumpf
@@ -1545,7 +1545,7 @@ jffs_commit_write(struct file *filp, struct page *page,
 {
       void *addr = page_address(page) + from;
       /* XXX: PAGE_CACHE_SHIFT or PAGE_SHIFT */
-       loff_t pos = (page->index<<PAGE_CACHE_SHIFT) + from;
+       loff_t pos = page_offset(page) + from;
       return jffs_file_write(filp, addr, to-from, &pos);
 } /* jffs_commit_write() */
diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c
index fff108bb118b..70f7a896c04a 100644
--- a/fs/jffs2/build.c
+++ b/fs/jffs2/build.c
@@ -47,7 +47,7 @@ next_inode(int *i, struct jffs2_inode_cache *ic, struct jffs2_sb_info *c)
             ic = next_inode(&i, ic, (c)))
-static inline void jffs2_build_inode_pass1(struct jffs2_sb_info *c,
+static void jffs2_build_inode_pass1(struct jffs2_sb_info *c,
                                        struct jffs2_inode_cache *ic)
 {
        struct jffs2_full_dirent *fd;
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index d0fcc5f3497e..09e5d10b8840 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -11,6 +11,7 @@
 *
 */
+#include <linux/capability.h>
 #include <linux/config.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
diff --git a/fs/jffs2/nodelist.c b/fs/jffs2/nodelist.c
index c79eebb8ab32..b635e167a3fa 100644
--- a/fs/jffs2/nodelist.c
+++ b/fs/jffs2/nodelist.c
@@ -134,7 +134,7 @@ static void jffs2_fragtree_insert(struct jffs2_node_frag *newfrag, struct jffs2_
 /*
 * Allocate and initializes a new fragment.
 */
-static inline struct jffs2_node_frag * new_fragment(struct jffs2_full_dnode *fn, uint32_t ofs, uint32_t size)
+static struct jffs2_node_frag * new_fragment(struct jffs2_full_dnode *fn, uint32_t ofs, uint32_t size)
 {
        struct jffs2_node_frag *newfrag;
@@ -513,7 +513,7 @@ free_out:
 *
 * Checks the node if we are in the checking stage.
 */
-static inline int check_node(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_tmp_dnode_info *tn)
+static int check_node(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_tmp_dnode_info *tn)
 {
        int ret;
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index 68000a50ceb6..2967b7393415 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -302,8 +302,7 @@ int dbSync(struct inode *ipbmap)
        /*
         * write out dirty pages of bmap
         */
-        filemap_fdatawrite(ipbmap->i_mapping);
+        filemap_write_and_wait(ipbmap->i_mapping);
-        filemap_fdatawait(ipbmap->i_mapping);
        diWriteSpecial(ipbmap, 0);
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 28201b194f53..31b4aa13dd4b 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -265,8 +265,7 @@ int diSync(struct inode *ipimap)
        /*
         * write out dirty pages of imap
         */
-        filemap_fdatawrite(ipimap->i_mapping);
+        filemap_write_and_wait(ipimap->i_mapping);
-        filemap_fdatawait(ipimap->i_mapping);
        diWriteSpecial(ipimap, 0);
@@ -565,8 +564,7 @@ void diFreeSpecial(struct inode *ip)
                jfs_err("diFreeSpecial called with NULL ip!");
                return;
        }
-        filemap_fdatawrite(ip->i_mapping);
+        filemap_write_and_wait(ip->i_mapping);
-        filemap_fdatawait(ip->i_mapping);
        truncate_inode_pages(ip->i_mapping, 0);
        iput(ip);
 }
diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h
index c0fd7b3eadc6..dc21a5bd54d4 100644
--- a/fs/jfs/jfs_incore.h
+++ b/fs/jfs/jfs_incore.h
@@ -58,7 +58,7 @@ struct jfs_inode_info {
        /*
         * rdwrlock serializes xtree between reads & writes and synchronizes
         * changes to special inodes.  It's use would be redundant on
-         * directories since the i_sem taken in the VFS is sufficient.
+         * directories since the i_mutex taken in the VFS is sufficient.
         */
        struct rw_semaphore rdwrlock;
        /*
@@ -68,7 +68,7 @@ struct jfs_inode_info {
         * inode is blocked in txBegin or TxBeginAnon
         */
        struct semaphore commit_sem;
-        /* xattr_sem allows us to access the xattrs without taking i_sem */
+        /* xattr_sem allows us to access the xattrs without taking i_mutex */
        struct rw_semaphore xattr_sem;
        lid_t   xtlid;          /* lid of xtree lock on directory */
 #ifdef CONFIG_JFS_POSIX_ACL
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index b660c93c92de..2ddb6b892bcf 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -1231,10 +1231,8 @@ int txCommit(tid_t tid,		/* transaction identifier */
                 * when we don't need to worry about it at all.
                 *
                 * if ((!S_ISDIR(ip->i_mode))
-                 *    && (tblk->flag & COMMIT_DELETE) == 0) {
+                 *    && (tblk->flag & COMMIT_DELETE) == 0)
-                 *      filemap_fdatawrite(ip->i_mapping);
+                 *      filemap_write_and_wait(ip->i_mapping);
-                 *      filemap_fdatawait(ip->i_mapping);
-                 * }
                 */
                /*
diff --git a/fs/jfs/jfs_umount.c b/fs/jfs/jfs_umount.c
index 5cf91785b541..21eaf7ac0fcb 100644
--- a/fs/jfs/jfs_umount.c
+++ b/fs/jfs/jfs_umount.c
@@ -108,8 +108,7 @@ int jfs_umount(struct super_block *sb)
         * Make sure all metadata makes it to disk before we mark
         * the superblock as clean
         */
-        filemap_fdatawrite(sbi->direct_inode->i_mapping);
+        filemap_write_and_wait(sbi->direct_inode->i_mapping);
-        filemap_fdatawait(sbi->direct_inode->i_mapping);
        /*
         * ensure all file system file pages are propagated to their
@@ -161,8 +160,7 @@ int jfs_umount_rw(struct super_block *sb)
         * mark the superblock clean before everything is flushed to
         * disk.
         */
-        filemap_fdatawrite(sbi->direct_inode->i_mapping);
+        filemap_write_and_wait(sbi->direct_inode->i_mapping);
-        filemap_fdatawait(sbi->direct_inode->i_mapping);
        updateSuper(sb, FM_CLEAN);
diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c
index c6dc254d3253..45180361871c 100644
--- a/fs/jfs/resize.c
+++ b/fs/jfs/resize.c
@@ -376,8 +376,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
         * by txCommit();
         */
        filemap_fdatawait(ipbmap->i_mapping);
-        filemap_fdatawrite(ipbmap->i_mapping);
+        filemap_write_and_wait(ipbmap->i_mapping);
-        filemap_fdatawait(ipbmap->i_mapping);
        diWriteSpecial(ipbmap, 0);
        newPage = nPages;       /* first new page number */
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 4226af3ea91b..8d31f1336431 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -502,8 +502,7 @@ out_no_rw:
                jfs_err("jfs_umount failed with return code %d", rc);
        }
 out_mount_failed:
-        filemap_fdatawrite(sbi->direct_inode->i_mapping);
+        filemap_write_and_wait(sbi->direct_inode->i_mapping);
-        filemap_fdatawait(sbi->direct_inode->i_mapping);
        truncate_inode_pages(sbi->direct_inode->i_mapping, 0);
        make_bad_inode(sbi->direct_inode);
        iput(sbi->direct_inode);
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index 23aa5066b5a4..f23048f9471f 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -17,6 +17,7 @@
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 */
+#include <linux/capability.h>
 #include <linux/fs.h>
 #include <linux/xattr.h>
 #include <linux/posix_acl_xattr.h>
@@ -83,21 +84,6 @@ struct ea_buffer {
 #define EA_NEW          0x0004
 #define EA_MALLOC       0x0008
-/* Namespaces */
-#define XATTR_SYSTEM_PREFIX "system."
-#define XATTR_SYSTEM_PREFIX_LEN (sizeof (XATTR_SYSTEM_PREFIX) - 1)
-#define XATTR_USER_PREFIX "user."
-#define XATTR_USER_PREFIX_LEN (sizeof (XATTR_USER_PREFIX) - 1)
-#define XATTR_OS2_PREFIX "os2."
-#define XATTR_OS2_PREFIX_LEN (sizeof (XATTR_OS2_PREFIX) - 1)
-/* XATTR_SECURITY_PREFIX is defined in include/linux/xattr.h */
-#define XATTR_SECURITY_PREFIX_LEN (sizeof (XATTR_SECURITY_PREFIX) - 1)
-#define XATTR_TRUSTED_PREFIX "trusted."
-#define XATTR_TRUSTED_PREFIX_LEN (sizeof (XATTR_TRUSTED_PREFIX) - 1)
 /*
 * These three routines are used to recognize on-disk extended attributes
@@ -773,36 +759,23 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
 static int can_set_xattr(struct inode *inode, const char *name,
                         const void *value, size_t value_len)
 {
-        if (IS_RDONLY(inode))
+        if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
-                return -EROFS;
-        if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-                return -EPERM;
-        if(strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) == 0)
-                /*
-                 * "system.*"
-                 */
                return can_set_system_xattr(inode, name, value, value_len);
-        if(strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) == 0)
+        /*
-                return (capable(CAP_SYS_ADMIN) ? 0 : -EPERM);
+         * Don't allow setting an attribute in an unknown namespace.
+         */
-#ifdef CONFIG_JFS_SECURITY
+        if (strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) &&
-        if (strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)
+            strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) &&
-            == 0)
+            strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) &&
-                return 0;       /* Leave it to the security module */
+            strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN))
-#endif
-                
-        if((strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) != 0) &&
-           (strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) != 0))
                return -EOPNOTSUPP;
        if (!S_ISREG(inode->i_mode) &&
            (!S_ISDIR(inode->i_mode) || inode->i_mode &S_ISVTX))
                return -EPERM;
-        return permission(inode, MAY_WRITE, NULL);
+        return 0;
 }
 int __jfs_setxattr(tid_t tid, struct inode *inode, const char *name,
@@ -972,22 +945,6 @@ int jfs_setxattr(struct dentry *dentry, const char *name, const void *value,
        return rc;
 }
-static int can_get_xattr(struct inode *inode, const char *name)
-{
-#ifdef CONFIG_JFS_SECURITY
-        if(strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) == 0)
-                return 0;
-#endif
-        if(strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) == 0)
-                return (capable(CAP_SYS_ADMIN) ? 0 : -EPERM);
-        if(strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) == 0)
-                return 0;
-        return permission(inode, MAY_READ, NULL);
-}
 ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data,
                       size_t buf_size)
 {
@@ -998,12 +955,8 @@ ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data,
        ssize_t size;
        int namelen = strlen(name);
        char *os2name = NULL;
-        int rc;
        char *value;
-        if ((rc = can_get_xattr(inode, name)))
-                return rc;
        if (strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) == 0) {
                os2name = kmalloc(namelen - XATTR_OS2_PREFIX_LEN + 1,
                                  GFP_KERNEL);
diff --git a/fs/libfs.c b/fs/libfs.c
index 58101dff2c66..63c020e6589e 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -74,7 +74,7 @@ int dcache_dir_close(struct inode *inode, struct file *file)
 loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
 {
-        down(&file->f_dentry->d_inode->i_sem);
+        mutex_lock(&file->f_dentry->d_inode->i_mutex);
        switch (origin) {
                case 1:
                        offset += file->f_pos;
@@ -82,7 +82,7 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
                        if (offset >= 0)
                                break;
                default:
-                        up(&file->f_dentry->d_inode->i_sem);
+                        mutex_unlock(&file->f_dentry->d_inode->i_mutex);
                        return -EINVAL;
        }
        if (offset != file->f_pos) {
@@ -93,20 +93,20 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
                        loff_t n = file->f_pos - 2;
                        spin_lock(&dcache_lock);
-                        list_del(&cursor->d_child);
+                        list_del(&cursor->d_u.d_child);
                        p = file->f_dentry->d_subdirs.next;
                        while (n && p != &file->f_dentry->d_subdirs) {
                                struct dentry *next;
-                                next = list_entry(p, struct dentry, d_child);
+                                next = list_entry(p, struct dentry, d_u.d_child);
                                if (!d_unhashed(next) && next->d_inode)
                                        n--;
                                p = p->next;
                        }
-                        list_add_tail(&cursor->d_child, p);
+                        list_add_tail(&cursor->d_u.d_child, p);
                        spin_unlock(&dcache_lock);
                }
        }
-        up(&file->f_dentry->d_inode->i_sem);
+        mutex_unlock(&file->f_dentry->d_inode->i_mutex);
        return offset;
 }
@@ -126,7 +126,7 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
 {
        struct dentry *dentry = filp->f_dentry;
        struct dentry *cursor = filp->private_data;
-        struct list_head *p, *q = &cursor->d_child;
+        struct list_head *p, *q = &cursor->d_u.d_child;
        ino_t ino;
        int i = filp->f_pos;
@@ -153,7 +153,7 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
                        }
                        for (p=q->next; p != &dentry->d_subdirs; p=p->next) {
                                struct dentry *next;
-                                next = list_entry(p, struct dentry, d_child);
+                                next = list_entry(p, struct dentry, d_u.d_child);
                                if (d_unhashed(next) || !next->d_inode)
                                        continue;
@@ -261,7 +261,7 @@ int simple_empty(struct dentry *dentry)
        int ret = 0;
        spin_lock(&dcache_lock);
-        list_for_each_entry(child, &dentry->d_subdirs, d_child)
+        list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child)
                if (simple_positive(child))
                        goto out;
        ret = 1;
@@ -356,7 +356,7 @@ int simple_commit_write(struct file *file, struct page *page,
        /*
         * No need to use i_size_read() here, the i_size
-         * cannot change under us because we hold the i_sem.
+         * cannot change under us because we hold the i_mutex.
         */
        if (pos > inode->i_size)
                i_size_write(inode, pos);
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index f01e9c0d2677..200fbda2c6d1 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -44,7 +44,7 @@ loff_t_to_s32(loff_t offset)
 /*
 * XDR functions for basic NLM types
 */
-static inline u32 *nlm_decode_cookie(u32 *p, struct nlm_cookie *c)
+static u32 *nlm_decode_cookie(u32 *p, struct nlm_cookie *c)
 {
        unsigned int    len;
@@ -79,7 +79,7 @@ nlm_encode_cookie(u32 *p, struct nlm_cookie *c)
        return p;
 }
-static inline u32 *
+static u32 *
 nlm_decode_fh(u32 *p, struct nfs_fh *f)
 {
        unsigned int    len;
@@ -119,7 +119,7 @@ nlm_encode_oh(u32 *p, struct xdr_netobj *oh)
        return xdr_encode_netobj(p, oh);
 }
-static inline u32 *
+static u32 *
 nlm_decode_lock(u32 *p, struct nlm_lock *lock)
 {
        struct file_lock        *fl = &lock->fl;
diff --git a/fs/locks.c b/fs/locks.c
index fb32d6218e21..909eab8fb1d0 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -154,7 +154,7 @@ static struct file_lock *locks_alloc_lock(void)
 }
 /* Free a lock which is not in use. */
-static inline void locks_free_lock(struct file_lock *fl)
+static void locks_free_lock(struct file_lock *fl)
 {
        if (fl == NULL) {
                BUG();
@@ -475,8 +475,7 @@ static inline int locks_overlap(struct file_lock *fl1, struct file_lock *fl2)
 /*
 * Check whether two locks have the same owner.
 */
-static inline int
+static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2)
-posix_same_owner(struct file_lock *fl1, struct file_lock *fl2)
 {
        if (fl1->fl_lmops && fl1->fl_lmops->fl_compare_owner)
                return fl2->fl_lmops == fl1->fl_lmops &&
@@ -487,7 +486,7 @@ posix_same_owner(struct file_lock *fl1, struct file_lock *fl2)
 /* Remove waiter from blocker's block list.
 * When blocker ends up pointing to itself then the list is empty.
 */
-static inline void __locks_delete_block(struct file_lock *waiter)
+static void __locks_delete_block(struct file_lock *waiter)
 {
        list_del_init(&waiter->fl_block);
        list_del_init(&waiter->fl_link);
diff --git a/fs/mbcache.c b/fs/mbcache.c
index 0f1e4530670f..f5bbe4c97c58 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -126,7 +126,7 @@ __mb_cache_entry_is_hashed(struct mb_cache_entry *ce)
 }
-static inline void
+static void
 __mb_cache_entry_unhash(struct mb_cache_entry *ce)
 {
        int n;
@@ -139,7 +139,7 @@ __mb_cache_entry_unhash(struct mb_cache_entry *ce)
 }
-static inline void
+static void
 __mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask)
 {
        struct mb_cache *cache = ce->e_cache;
@@ -158,7 +158,7 @@ __mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask)
 }
-static inline void
+static void
 __mb_cache_entry_release_unlock(struct mb_cache_entry *ce)
 {
        /* Wake up all processes queuing for this cache entry. */
diff --git a/fs/mpage.c b/fs/mpage.c
index f1d2d02bd4c8..e431cb3878d6 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -184,7 +184,7 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
        if (page_has_buffers(page))
                goto confused;
-        block_in_file = page->index << (PAGE_CACHE_SHIFT - blkbits);
+        block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
        last_block = (i_size_read(inode) + blocksize - 1) >> blkbits;
        bh.b_page = page;
@@ -466,7 +466,7 @@ __mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block,
         * The page has no buffers: map it to disk
         */
        BUG_ON(!PageUptodate(page));
-        block_in_file = page->index << (PAGE_CACHE_SHIFT - blkbits);
+        block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
        last_block = (i_size - 1) >> blkbits;
        map_bh.b_page = page;
        for (page_block = 0; page_block < blocks_per_page; ) {
diff --git a/fs/namei.c b/fs/namei.c
index 6dbbd42d8b95..4acdac043b6b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -28,7 +28,10 @@
 #include <linux/syscalls.h>
 #include <linux/mount.h>
 #include <linux/audit.h>
+#include <linux/capability.h>
 #include <linux/file.h>
+#include <linux/fcntl.h>
+#include <linux/namei.h>
 #include <asm/namei.h>
 #include <asm/uaccess.h>
@@ -112,7 +115,7 @@
 * POSIX.1 2.4: an empty pathname is invalid (ENOENT).
 * PATH_MAX includes the nul terminator --RR.
 */
-static inline int do_getname(const char __user *filename, char *page)
+static int do_getname(const char __user *filename, char *page)
 {
        int retval;
        unsigned long len = PATH_MAX;
@@ -395,7 +398,7 @@ static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name,
 * short-cut DAC fails, then call permission() to do more
 * complete permission check.
 */
-static inline int exec_permission_lite(struct inode *inode,
+static int exec_permission_lite(struct inode *inode,
                                       struct nameidata *nd)
 {
        umode_t mode = inode->i_mode;
@@ -438,7 +441,7 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s
        struct dentry * result;
        struct inode *dir = parent->d_inode;
-        down(&dir->i_sem);
+        mutex_lock(&dir->i_mutex);
        /*
         * First re-do the cached lookup just in case it was created
         * while we waited for the directory semaphore..
@@ -464,7 +467,7 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s
                        else
                                result = dentry;
                }
-                up(&dir->i_sem);
+                mutex_unlock(&dir->i_mutex);
                return result;
        }
@@ -472,7 +475,7 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s
         * Uhhuh! Nasty case: the cache was re-populated while
         * we waited on the semaphore. Need to revalidate.
         */
-        up(&dir->i_sem);
+        mutex_unlock(&dir->i_mutex);
        if (result->d_op && result->d_op->d_revalidate) {
                if (!result->d_op->d_revalidate(result, nd) && !d_invalidate(result)) {
                        dput(result);
@@ -485,7 +488,7 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s
 static int __emul_lookup_dentry(const char *, struct nameidata *);
 /* SMP-safe */
-static inline int
+static __always_inline int
 walk_init_root(const char *name, struct nameidata *nd)
 {
        read_lock(&current->fs->lock);
@@ -503,7 +506,7 @@ walk_init_root(const char *name, struct nameidata *nd)
        return 1;
 }
-static inline int __vfs_follow_link(struct nameidata *nd, const char *link)
+static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link)
 {
        int res = 0;
        char *name;
@@ -543,7 +546,7 @@ struct path {
        struct dentry *dentry;
 };
-static inline int __do_follow_link(struct path *path, struct nameidata *nd)
+static __always_inline int __do_follow_link(struct path *path, struct nameidata *nd)
 {
        int error;
        void *cookie;
@@ -689,7 +692,7 @@ int follow_down(struct vfsmount **mnt, struct dentry **dentry)
        return 0;
 }
-static inline void follow_dotdot(struct nameidata *nd)
+static __always_inline void follow_dotdot(struct nameidata *nd)
 {
        while(1) {
                struct vfsmount *parent;
@@ -1062,7 +1065,8 @@ set_it:
 }
 /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
-int fastcall path_lookup(const char *name, unsigned int flags, struct nameidata *nd)
+static int fastcall do_path_lookup(int dfd, const char *name,
+                                unsigned int flags, struct nameidata *nd)
 {
        int retval = 0;
@@ -1082,9 +1086,38 @@ int fastcall path_lookup(const char *name, unsigned int flags, struct nameidata
                }
                nd->mnt = mntget(current->fs->rootmnt);
                nd->dentry = dget(current->fs->root);
-        } else {
+        } else if (dfd == AT_FDCWD) {
                nd->mnt = mntget(current->fs->pwdmnt);
                nd->dentry = dget(current->fs->pwd);
+        } else {
+                struct file *file;
+                int fput_needed;
+                struct dentry *dentry;
+                file = fget_light(dfd, &fput_needed);
+                if (!file) {
+                        retval = -EBADF;
+                        goto out_fail;
+                }
+                dentry = file->f_dentry;
+                if (!S_ISDIR(dentry->d_inode->i_mode)) {
+                        retval = -ENOTDIR;
+                        fput_light(file, fput_needed);
+                        goto out_fail;
+                }
+                retval = file_permission(file, MAY_EXEC);
+                if (retval) {
+                        fput_light(file, fput_needed);
+                        goto out_fail;
+                }
+                nd->mnt = mntget(file->f_vfsmnt);
+                nd->dentry = dget(dentry);
+                fput_light(file, fput_needed);
        }
        read_unlock(&current->fs->lock);
        current->total_link_count = 0;
@@ -1093,11 +1126,19 @@ out:
        if (unlikely(current->audit_context
                     && nd && nd->dentry && nd->dentry->d_inode))
                audit_inode(name, nd->dentry->d_inode, flags);
+out_fail:
        return retval;
 }
-static int __path_lookup_intent_open(const char *name, unsigned int lookup_flags,
+int fastcall path_lookup(const char *name, unsigned int flags,
-                struct nameidata *nd, int open_flags, int create_mode)
+                        struct nameidata *nd)
+{
+        return do_path_lookup(AT_FDCWD, name, flags, nd);
+}
+static int __path_lookup_intent_open(int dfd, const char *name,
+                unsigned int lookup_flags, struct nameidata *nd,
+                int open_flags, int create_mode)
 {
        struct file *filp = get_empty_filp();
        int err;
@@ -1107,7 +1148,7 @@ static int __path_lookup_intent_open(const char *name, unsigned int lookup_flags
        nd->intent.open.file = filp;
        nd->intent.open.flags = open_flags;
        nd->intent.open.create_mode = create_mode;
-        err = path_lookup(name, lookup_flags|LOOKUP_OPEN, nd);
+        err = do_path_lookup(dfd, name, lookup_flags|LOOKUP_OPEN, nd);
        if (IS_ERR(nd->intent.open.file)) {
                if (err == 0) {
                        err = PTR_ERR(nd->intent.open.file);
@@ -1125,10 +1166,10 @@ static int __path_lookup_intent_open(const char *name, unsigned int lookup_flags
 * @nd: pointer to nameidata
 * @open_flags: open intent flags
 */
-int path_lookup_open(const char *name, unsigned int lookup_flags,
+int path_lookup_open(int dfd, const char *name, unsigned int lookup_flags,
                struct nameidata *nd, int open_flags)
 {
-        return __path_lookup_intent_open(name, lookup_flags, nd,
+        return __path_lookup_intent_open(dfd, name, lookup_flags, nd,
                        open_flags, 0);
 }
@@ -1140,12 +1181,12 @@ int path_lookup_open(const char *name, unsigned int lookup_flags,
 * @open_flags: open intent flags
 * @create_mode: create intent flags
 */
-static int path_lookup_create(const char *name, unsigned int lookup_flags,
+static int path_lookup_create(int dfd, const char *name,
-                              struct nameidata *nd, int open_flags,
+                              unsigned int lookup_flags, struct nameidata *nd,
-                              int create_mode)
+                              int open_flags, int create_mode)
 {
-        return __path_lookup_intent_open(name, lookup_flags|LOOKUP_CREATE, nd,
+        return __path_lookup_intent_open(dfd, name, lookup_flags|LOOKUP_CREATE,
-                        open_flags, create_mode);
+                        nd, open_flags, create_mode);
 }
 int __user_path_lookup_open(const char __user *name, unsigned int lookup_flags,
@@ -1155,7 +1196,7 @@ int __user_path_lookup_open(const char __user *name, unsigned int lookup_flags,
        int err = PTR_ERR(tmp);
        if (!IS_ERR(tmp)) {
-                err = __path_lookup_intent_open(tmp, lookup_flags, nd, open_flags, 0);
+                err = __path_lookup_intent_open(AT_FDCWD, tmp, lookup_flags, nd, open_flags, 0);
                putname(tmp);
        }
        return err;
@@ -1247,18 +1288,24 @@ access:
 * that namei follows links, while lnamei does not.
 * SMP-safe
 */
-int fastcall __user_walk(const char __user *name, unsigned flags, struct nameidata *nd)
+int fastcall __user_walk_fd(int dfd, const char __user *name, unsigned flags,
+                            struct nameidata *nd)
 {
        char *tmp = getname(name);
        int err = PTR_ERR(tmp);
        if (!IS_ERR(tmp)) {
-                err = path_lookup(tmp, flags, nd);
+                err = do_path_lookup(dfd, tmp, flags, nd);
                putname(tmp);
        }
        return err;
 }
+int fastcall __user_walk(const char __user *name, unsigned flags, struct nameidata *nd)
+{
+        return __user_walk_fd(AT_FDCWD, name, flags, nd);
+}
 /*
 * It's inline, so penalty for filesystems that don't use sticky bit is
 * minimal.
@@ -1293,7 +1340,7 @@ static inline int check_sticky(struct inode *dir, struct inode *inode)
 * 10. We don't allow removal of NFS sillyrenamed files; it's handled by
 *     nfs_async_unlink().
 */
-static inline int may_delete(struct inode *dir,struct dentry *victim,int isdir)
+static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
 {
        int error;
@@ -1366,7 +1413,7 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
        struct dentry *p;
        if (p1 == p2) {
-                down(&p1->d_inode->i_sem);
+                mutex_lock(&p1->d_inode->i_mutex);
                return NULL;
        }
@@ -1374,30 +1421,30 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
        for (p = p1; p->d_parent != p; p = p->d_parent) {
                if (p->d_parent == p2) {
-                        down(&p2->d_inode->i_sem);
+                        mutex_lock(&p2->d_inode->i_mutex);
-                        down(&p1->d_inode->i_sem);
+                        mutex_lock(&p1->d_inode->i_mutex);
                        return p;
                }
        }
        for (p = p2; p->d_parent != p; p = p->d_parent) {
                if (p->d_parent == p1) {
-                        down(&p1->d_inode->i_sem);
+                        mutex_lock(&p1->d_inode->i_mutex);
-                        down(&p2->d_inode->i_sem);
+                        mutex_lock(&p2->d_inode->i_mutex);
                        return p;
                }
        }
-        down(&p1->d_inode->i_sem);
+        mutex_lock(&p1->d_inode->i_mutex);
-        down(&p2->d_inode->i_sem);
+        mutex_lock(&p2->d_inode->i_mutex);
        return NULL;
 }
 void unlock_rename(struct dentry *p1, struct dentry *p2)
 {
-        up(&p1->d_inode->i_sem);
+        mutex_unlock(&p1->d_inode->i_mutex);
        if (p1 != p2) {
-                up(&p2->d_inode->i_sem);
+                mutex_unlock(&p2->d_inode->i_mutex);
                up(&p1->d_inode->i_sb->s_vfs_rename_sem);
        }
 }
@@ -1491,7 +1538,7 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
                if (!error) {
                        DQUOT_INIT(inode);
                        
-                        error = do_truncate(dentry, 0, NULL);
+                        error = do_truncate(dentry, 0, ATTR_MTIME|ATTR_CTIME, NULL);
                }
                put_write_access(inode);
                if (error)
@@ -1517,7 +1564,8 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
 * for symlinks (where the permissions are checked later).
 * SMP-safe
 */
-int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
+int open_namei(int dfd, const char *pathname, int flag,
+                int mode, struct nameidata *nd)
 {
        int acc_mode, error;
        struct path path;
@@ -1539,7 +1587,8 @@ int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
         * The simplest case - just a plain lookup.
         */
        if (!(flag & O_CREAT)) {
-                error = path_lookup_open(pathname, lookup_flags(flag), nd, flag);
+                error = path_lookup_open(dfd, pathname, lookup_flags(flag),
+                                         nd, flag);
                if (error)
                        return error;
                goto ok;
@@ -1548,7 +1597,7 @@ int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
        /*
         * Create - we need to know the parent.
         */
-        error = path_lookup_create(pathname, LOOKUP_PARENT, nd, flag, mode);
+        error = path_lookup_create(dfd,pathname,LOOKUP_PARENT,nd,flag,mode);
        if (error)
                return error;
@@ -1563,14 +1612,14 @@ int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
        dir = nd->dentry;
        nd->flags &= ~LOOKUP_PARENT;
-        down(&dir->d_inode->i_sem);
+        mutex_lock(&dir->d_inode->i_mutex);
        path.dentry = lookup_hash(nd);
        path.mnt = nd->mnt;
 do_last:
        error = PTR_ERR(path.dentry);
        if (IS_ERR(path.dentry)) {
-                up(&dir->d_inode->i_sem);
+                mutex_unlock(&dir->d_inode->i_mutex);
                goto exit;
        }
@@ -1579,7 +1628,7 @@ do_last:
                if (!IS_POSIXACL(dir->d_inode))
                        mode &= ~current->fs->umask;
                error = vfs_create(dir->d_inode, path.dentry, mode, nd);
-                up(&dir->d_inode->i_sem);
+                mutex_unlock(&dir->d_inode->i_mutex);
                dput(nd->dentry);
                nd->dentry = path.dentry;
                if (error)
@@ -1593,7 +1642,7 @@ do_last:
        /*
         * It already exists.
         */
-        up(&dir->d_inode->i_sem);
+        mutex_unlock(&dir->d_inode->i_mutex);
        error = -EEXIST;
        if (flag & O_EXCL)
@@ -1665,7 +1714,7 @@ do_link:
                goto exit;
        }
        dir = nd->dentry;
-        down(&dir->d_inode->i_sem);
+        mutex_lock(&dir->d_inode->i_mutex);
        path.dentry = lookup_hash(nd);
        path.mnt = nd->mnt;
        __putname(nd->last.name);
@@ -1680,13 +1729,13 @@ do_link:
 * Simple function to lookup and return a dentry and create it
 * if it doesn't exist.  Is SMP-safe.
 *
- * Returns with nd->dentry->d_inode->i_sem locked.
+ * Returns with nd->dentry->d_inode->i_mutex locked.
 */
 struct dentry *lookup_create(struct nameidata *nd, int is_dir)
 {
        struct dentry *dentry = ERR_PTR(-EEXIST);
-        down(&nd->dentry->d_inode->i_sem);
+        mutex_lock(&nd->dentry->d_inode->i_mutex);
        /*
         * Yucky last component or no last component at all?
         * (foo/., foo/.., /////)
@@ -1743,7 +1792,8 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
        return error;
 }
-asmlinkage long sys_mknod(const char __user * filename, int mode, unsigned dev)
+asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode,
+                                unsigned dev)
 {
        int error = 0;
        char * tmp;
@@ -1756,7 +1806,7 @@ asmlinkage long sys_mknod(const char __user * filename, int mode, unsigned dev)
        if (IS_ERR(tmp))
                return PTR_ERR(tmp);
-        error = path_lookup(tmp, LOOKUP_PARENT, &nd);
+        error = do_path_lookup(dfd, tmp, LOOKUP_PARENT, &nd);
        if (error)
                goto out;
        dentry = lookup_create(&nd, 0);
@@ -1784,7 +1834,7 @@ asmlinkage long sys_mknod(const char __user * filename, int mode, unsigned dev)
                }
                dput(dentry);
        }
-        up(&nd.dentry->d_inode->i_sem);
+        mutex_unlock(&nd.dentry->d_inode->i_mutex);
        path_release(&nd);
 out:
        putname(tmp);
@@ -1792,6 +1842,11 @@ out:
        return error;
 }
+asmlinkage long sys_mknod(const char __user *filename, int mode, unsigned dev)
+{
+        return sys_mknodat(AT_FDCWD, filename, mode, dev);
+}
 int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 {
        int error = may_create(dir, dentry, NULL);
@@ -1814,7 +1869,7 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
        return error;
 }
-asmlinkage long sys_mkdir(const char __user * pathname, int mode)
+asmlinkage long sys_mkdirat(int dfd, const char __user *pathname, int mode)
 {
        int error = 0;
        char * tmp;
@@ -1825,7 +1880,7 @@ asmlinkage long sys_mkdir(const char __user * pathname, int mode)
                struct dentry *dentry;
                struct nameidata nd;
-                error = path_lookup(tmp, LOOKUP_PARENT, &nd);
+                error = do_path_lookup(dfd, tmp, LOOKUP_PARENT, &nd);
                if (error)
                        goto out;
                dentry = lookup_create(&nd, 1);
@@ -1836,7 +1891,7 @@ asmlinkage long sys_mkdir(const char __user * pathname, int mode)
                        error = vfs_mkdir(nd.dentry->d_inode, dentry, mode);
                        dput(dentry);
                }
-                up(&nd.dentry->d_inode->i_sem);
+                mutex_unlock(&nd.dentry->d_inode->i_mutex);
                path_release(&nd);
 out:
                putname(tmp);
@@ -1845,6 +1900,11 @@ out:
        return error;
 }
+asmlinkage long sys_mkdir(const char __user *pathname, int mode)
+{
+        return sys_mkdirat(AT_FDCWD, pathname, mode);
+}
 /*
 * We try to drop the dentry early: we should have
 * a usage count of 2 if we're the only user of this
@@ -1885,7 +1945,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
        DQUOT_INIT(dir);
-        down(&dentry->d_inode->i_sem);
+        mutex_lock(&dentry->d_inode->i_mutex);
        dentry_unhash(dentry);
        if (d_mountpoint(dentry))
                error = -EBUSY;
@@ -1897,7 +1957,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
                                dentry->d_inode->i_flags |= S_DEAD;
                }
        }
-        up(&dentry->d_inode->i_sem);
+        mutex_unlock(&dentry->d_inode->i_mutex);
        if (!error) {
                d_delete(dentry);
        }
@@ -1906,7 +1966,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
        return error;
 }
-asmlinkage long sys_rmdir(const char __user * pathname)
+static long do_rmdir(int dfd, const char __user *pathname)
 {
        int error = 0;
        char * name;
@@ -1917,7 +1977,7 @@ asmlinkage long sys_rmdir(const char __user * pathname)
        if(IS_ERR(name))
                return PTR_ERR(name);
-        error = path_lookup(name, LOOKUP_PARENT, &nd);
+        error = do_path_lookup(dfd, name, LOOKUP_PARENT, &nd);
        if (error)
                goto exit;
@@ -1932,14 +1992,14 @@ asmlinkage long sys_rmdir(const char __user * pathname)
                        error = -EBUSY;
                        goto exit1;
        }
-        down(&nd.dentry->d_inode->i_sem);
+        mutex_lock(&nd.dentry->d_inode->i_mutex);
        dentry = lookup_hash(&nd);
        error = PTR_ERR(dentry);
        if (!IS_ERR(dentry)) {
                error = vfs_rmdir(nd.dentry->d_inode, dentry);
                dput(dentry);
        }
-        up(&nd.dentry->d_inode->i_sem);
+        mutex_unlock(&nd.dentry->d_inode->i_mutex);
 exit1:
        path_release(&nd);
 exit:
@@ -1947,6 +2007,11 @@ exit:
        return error;
 }
+asmlinkage long sys_rmdir(const char __user *pathname)
+{
+        return do_rmdir(AT_FDCWD, pathname);
+}
 int vfs_unlink(struct inode *dir, struct dentry *dentry)
 {
        int error = may_delete(dir, dentry, 0);
@@ -1959,7 +2024,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
        DQUOT_INIT(dir);
-        down(&dentry->d_inode->i_sem);
+        mutex_lock(&dentry->d_inode->i_mutex);
        if (d_mountpoint(dentry))
                error = -EBUSY;
        else {
@@ -1967,7 +2032,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
                if (!error)
                        error = dir->i_op->unlink(dir, dentry);
        }
-        up(&dentry->d_inode->i_sem);
+        mutex_unlock(&dentry->d_inode->i_mutex);
        /* We don't d_delete() NFS sillyrenamed files--they still exist. */
        if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) {
@@ -1979,11 +2044,11 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
 /*
 * Make sure that the actual truncation of the file will occur outside its
- * directory's i_sem.  Truncate can take a long time if there is a lot of
+ * directory's i_mutex.  Truncate can take a long time if there is a lot of
 * writeout happening, and we don't want to prevent access to the directory
 * while waiting on the I/O.
 */
-asmlinkage long sys_unlink(const char __user * pathname)
+static long do_unlinkat(int dfd, const char __user *pathname)
 {
        int error = 0;
        char * name;
@@ -1995,13 +2060,13 @@ asmlinkage long sys_unlink(const char __user * pathname)
        if(IS_ERR(name))
                return PTR_ERR(name);
-        error = path_lookup(name, LOOKUP_PARENT, &nd);
+        error = do_path_lookup(dfd, name, LOOKUP_PARENT, &nd);
        if (error)
                goto exit;
        error = -EISDIR;
        if (nd.last_type != LAST_NORM)
                goto exit1;
-        down(&nd.dentry->d_inode->i_sem);
+        mutex_lock(&nd.dentry->d_inode->i_mutex);
        dentry = lookup_hash(&nd);
        error = PTR_ERR(dentry);
        if (!IS_ERR(dentry)) {
@@ -2015,7 +2080,7 @@ asmlinkage long sys_unlink(const char __user * pathname)
        exit2:
                dput(dentry);
        }
-        up(&nd.dentry->d_inode->i_sem);
+        mutex_unlock(&nd.dentry->d_inode->i_mutex);
        if (inode)
                iput(inode);    /* truncate the inode here */
 exit1:
@@ -2030,6 +2095,22 @@ slashes:
        goto exit2;
 }
+asmlinkage long sys_unlinkat(int dfd, const char __user *pathname, int flag)
+{
+        if ((flag & ~AT_REMOVEDIR) != 0)
+                return -EINVAL;
+        if (flag & AT_REMOVEDIR)
+                return do_rmdir(dfd, pathname);
+        return do_unlinkat(dfd, pathname);
+}
+asmlinkage long sys_unlink(const char __user *pathname)
+{
+        return do_unlinkat(AT_FDCWD, pathname);
+}
 int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, int mode)
 {
        int error = may_create(dir, dentry, NULL);
@@ -2051,7 +2132,8 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, i
        return error;
 }
-asmlinkage long sys_symlink(const char __user * oldname, const char __user * newname)
+asmlinkage long sys_symlinkat(const char __user *oldname,
+                              int newdfd, const char __user *newname)
 {
        int error = 0;
        char * from;
@@ -2066,7 +2148,7 @@ asmlinkage long sys_symlink(const char __user * oldname, const char __user * new
                struct dentry *dentry;
                struct nameidata nd;
-                error = path_lookup(to, LOOKUP_PARENT, &nd);
+                error = do_path_lookup(newdfd, to, LOOKUP_PARENT, &nd);
                if (error)
                        goto out;
                dentry = lookup_create(&nd, 0);
@@ -2075,7 +2157,7 @@ asmlinkage long sys_symlink(const char __user * oldname, const char __user * new
                        error = vfs_symlink(nd.dentry->d_inode, dentry, from, S_IALLUGO);
                        dput(dentry);
                }
-                up(&nd.dentry->d_inode->i_sem);
+                mutex_unlock(&nd.dentry->d_inode->i_mutex);
                path_release(&nd);
 out:
                putname(to);
@@ -2084,6 +2166,11 @@ out:
        return error;
 }
+asmlinkage long sys_symlink(const char __user *oldname, const char __user *newname)
+{
+        return sys_symlinkat(oldname, AT_FDCWD, newname);
+}
 int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry)
 {
        struct inode *inode = old_dentry->d_inode;
@@ -2113,10 +2200,10 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
        if (error)
                return error;
-        down(&old_dentry->d_inode->i_sem);
+        mutex_lock(&old_dentry->d_inode->i_mutex);
        DQUOT_INIT(dir);
        error = dir->i_op->link(old_dentry, dir, new_dentry);
-        up(&old_dentry->d_inode->i_sem);
+        mutex_unlock(&old_dentry->d_inode->i_mutex);
        if (!error)
                fsnotify_create(dir, new_dentry->d_name.name);
        return error;
@@ -2131,7 +2218,8 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
 * with linux 2.0, and to avoid hard-linking to directories
 * and other special files.  --ADM
 */
-asmlinkage long sys_link(const char __user * oldname, const char __user * newname)
+asmlinkage long sys_linkat(int olddfd, const char __user *oldname,
+                           int newdfd, const char __user *newname)
 {
        struct dentry *new_dentry;
        struct nameidata nd, old_nd;
@@ -2142,10 +2230,10 @@ asmlinkage long sys_link(const char __user * oldname, const char __user * newnam
        if (IS_ERR(to))
                return PTR_ERR(to);
-        error = __user_walk(oldname, 0, &old_nd);
+        error = __user_walk_fd(olddfd, oldname, 0, &old_nd);
        if (error)
                goto exit;
-        error = path_lookup(to, LOOKUP_PARENT, &nd);
+        error = do_path_lookup(newdfd, to, LOOKUP_PARENT, &nd);
        if (error)
                goto out;
        error = -EXDEV;
@@ -2157,7 +2245,7 @@ asmlinkage long sys_link(const char __user * oldname, const char __user * newnam
                error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
                dput(new_dentry);
        }
-        up(&nd.dentry->d_inode->i_sem);
+        mutex_unlock(&nd.dentry->d_inode->i_mutex);
 out_release:
        path_release(&nd);
 out:
@@ -2168,6 +2256,11 @@ exit:
        return error;
 }
+asmlinkage long sys_link(const char __user *oldname, const char __user *newname)
+{
+        return sys_linkat(AT_FDCWD, oldname, AT_FDCWD, newname);
+}
 /*
 * The worst of all namespace operations - renaming directory. "Perverted"
 * doesn't even start to describe it. Somebody in UCB had a heck of a trip...
@@ -2178,7 +2271,7 @@ exit:
 *         sb->s_vfs_rename_sem. We might be more accurate, but that's another
 *         story.
 *      c) we have to lock _three_ objects - parents and victim (if it exists).
- *         And that - after we got ->i_sem on parents (until then we don't know
+ *         And that - after we got ->i_mutex on parents (until then we don't know
 *         whether the target exists).  Solution: try to be smart with locking
 *         order for inodes.  We rely on the fact that tree topology may change
 *         only under ->s_vfs_rename_sem _and_ that parent of the object we
@@ -2195,9 +2288,9 @@ exit:
 *         stuff into VFS), but the former is not going away. Solution: the same
 *         trick as in rmdir().
 *      e) conversion from fhandle to dentry may come in the wrong moment - when
- *         we are removing the target. Solution: we will have to grab ->i_sem
+ *         we are removing the target. Solution: we will have to grab ->i_mutex
 *         in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on
- *         ->i_sem on parents, which works but leads to some truely excessive
+ *         ->i_mutex on parents, which works but leads to some truely excessive
 *         locking].
 */
 static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
@@ -2222,7 +2315,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
        target = new_dentry->d_inode;
        if (target) {
-                down(&target->i_sem);
+                mutex_lock(&target->i_mutex);
                dentry_unhash(new_dentry);
        }
        if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
@@ -2232,7 +2325,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
        if (target) {
                if (!error)
                        target->i_flags |= S_DEAD;
-                up(&target->i_sem);
+                mutex_unlock(&target->i_mutex);
                if (d_unhashed(new_dentry))
                        d_rehash(new_dentry);
                dput(new_dentry);
@@ -2255,7 +2348,7 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
        dget(new_dentry);
        target = new_dentry->d_inode;
        if (target)
-                down(&target->i_sem);
+                mutex_lock(&target->i_mutex);
        if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
                error = -EBUSY;
        else
@@ -2266,7 +2359,7 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
                        d_move(old_dentry, new_dentry);
        }
        if (target)
-                up(&target->i_sem);
+                mutex_unlock(&target->i_mutex);
        dput(new_dentry);
        return error;
 }
@@ -2314,7 +2407,8 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        return error;
 }
-static inline int do_rename(const char * oldname, const char * newname)
+static int do_rename(int olddfd, const char *oldname,
+                        int newdfd, const char *newname)
 {
        int error = 0;
        struct dentry * old_dir, * new_dir;
@@ -2322,11 +2416,11 @@ static inline int do_rename(const char * oldname, const char * newname)
        struct dentry * trap;
        struct nameidata oldnd, newnd;
-        error = path_lookup(oldname, LOOKUP_PARENT, &oldnd);
+        error = do_path_lookup(olddfd, oldname, LOOKUP_PARENT, &oldnd);
        if (error)
                goto exit;
-        error = path_lookup(newname, LOOKUP_PARENT, &newnd);
+        error = do_path_lookup(newdfd, newname, LOOKUP_PARENT, &newnd);
        if (error)
                goto exit1;
@@ -2390,7 +2484,8 @@ exit:
        return error;
 }
-asmlinkage long sys_rename(const char __user * oldname, const char __user * newname)
+asmlinkage long sys_renameat(int olddfd, const char __user *oldname,
+                             int newdfd, const char __user *newname)
 {
        int error;
        char * from;
@@ -2402,13 +2497,18 @@ asmlinkage long sys_rename(const char __user * oldname, const char __user * newn
        to = getname(newname);
        error = PTR_ERR(to);
        if (!IS_ERR(to)) {
-                error = do_rename(from,to);
+                error = do_rename(olddfd, from, newdfd, to);
                putname(to);
        }
        putname(from);
        return error;
 }
+asmlinkage long sys_rename(const char __user *oldname, const char __user *newname)
+{
+        return sys_renameat(AT_FDCWD, oldname, AT_FDCWD, newname);
+}
 int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const char *link)
 {
        int len;
@@ -2552,6 +2652,7 @@ struct inode_operations page_symlink_inode_operations = {
 };
 EXPORT_SYMBOL(__user_walk);
+EXPORT_SYMBOL(__user_walk_fd);
 EXPORT_SYMBOL(follow_down);
 EXPORT_SYMBOL(follow_up);
 EXPORT_SYMBOL(get_write_access); /* binfmt_aout */
diff --git a/fs/namespace.c b/fs/namespace.c
index 2019899f2ab8..ce97becff461 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -16,6 +16,7 @@
 #include <linux/init.h>
 #include <linux/quotaops.h>
 #include <linux/acct.h>
+#include <linux/capability.h>
 #include <linux/module.h>
 #include <linux/seq_file.h>
 #include <linux/namespace.h>
@@ -47,6 +48,10 @@ static int hash_mask __read_mostly, hash_bits __read_mostly;
 static kmem_cache_t *mnt_cache;
 static struct rw_semaphore namespace_sem;
+/* /sys/fs */
+decl_subsys(fs, NULL, NULL);
+EXPORT_SYMBOL_GPL(fs_subsys);
 static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
 {
        unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
@@ -355,14 +360,14 @@ static int show_vfsmnt(struct seq_file *m, void *v)
                { MS_SYNCHRONOUS, ",sync" },
                { MS_DIRSYNC, ",dirsync" },
                { MS_MANDLOCK, ",mand" },
-                { MS_NOATIME, ",noatime" },
-                { MS_NODIRATIME, ",nodiratime" },
                { 0, NULL }
        };
        static struct proc_fs_info mnt_info[] = {
                { MNT_NOSUID, ",nosuid" },
                { MNT_NODEV, ",nodev" },
                { MNT_NOEXEC, ",noexec" },
+                { MNT_NOATIME, ",noatime" },
+                { MNT_NODIRATIME, ",nodiratime" },
                { 0, NULL }
        };
        struct proc_fs_info *fs_infop;
@@ -451,7 +456,7 @@ EXPORT_SYMBOL(may_umount);
 void release_mounts(struct list_head *head)
 {
        struct vfsmount *mnt;
-        while(!list_empty(head)) {
+        while (!list_empty(head)) {
                mnt = list_entry(head->next, struct vfsmount, mnt_hash);
                list_del_init(&mnt->mnt_hash);
                if (mnt->mnt_parent != mnt) {
@@ -814,7 +819,7 @@ static int graft_tree(struct vfsmount *mnt, struct nameidata *nd)
                return -ENOTDIR;
        err = -ENOENT;
-        down(&nd->dentry->d_inode->i_sem);
+        mutex_lock(&nd->dentry->d_inode->i_mutex);
        if (IS_DEADDIR(nd->dentry->d_inode))
                goto out_unlock;
@@ -826,7 +831,7 @@ static int graft_tree(struct vfsmount *mnt, struct nameidata *nd)
        if (IS_ROOT(nd->dentry) || !d_unhashed(nd->dentry))
                err = attach_recursive_mnt(mnt, nd, NULL);
 out_unlock:
-        up(&nd->dentry->d_inode->i_sem);
+        mutex_unlock(&nd->dentry->d_inode->i_mutex);
        if (!err)
                security_sb_post_addmount(mnt, nd);
        return err;
@@ -962,7 +967,7 @@ static int do_move_mount(struct nameidata *nd, char *old_name)
                goto out;
        err = -ENOENT;
-        down(&nd->dentry->d_inode->i_sem);
+        mutex_lock(&nd->dentry->d_inode->i_mutex);
        if (IS_DEADDIR(nd->dentry->d_inode))
                goto out1;
@@ -1004,7 +1009,7 @@ static int do_move_mount(struct nameidata *nd, char *old_name)
        list_del_init(&old_nd.mnt->mnt_expire);
        spin_unlock(&vfsmount_lock);
 out1:
-        up(&nd->dentry->d_inode->i_sem);
+        mutex_unlock(&nd->dentry->d_inode->i_mutex);
 out:
        up_write(&namespace_sem);
        if (!err)
@@ -1286,7 +1291,13 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
                mnt_flags |= MNT_NODEV;
        if (flags & MS_NOEXEC)
                mnt_flags |= MNT_NOEXEC;
-        flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE);
+        if (flags & MS_NOATIME)
+                mnt_flags |= MNT_NOATIME;
+        if (flags & MS_NODIRATIME)
+                mnt_flags |= MNT_NODIRATIME;
+        flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE |
+                   MS_NOATIME | MS_NODIRATIME);
        /* ... and get the mountpoint */
        retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd);
@@ -1526,6 +1537,10 @@ static void chroot_fs_refs(struct nameidata *old_nd, struct nameidata *new_nd)
 * pointed to by put_old must yield the same directory as new_root. No other
 * file system may be mounted on put_old. After all, new_root is a mountpoint.
 *
+ * Also, the current root cannot be on the 'rootfs' (initial ramfs) filesystem.
+ * See Documentation/filesystems/ramfs-rootfs-initramfs.txt for alternatives
+ * in this situation.
+ *
 * Notes:
 *  - we don't move root/cwd if they are not at the root (reason: if something
 *    cared enough to change them, it's probably wrong to force them elsewhere)
@@ -1569,7 +1584,7 @@ asmlinkage long sys_pivot_root(const char __user * new_root,
        user_nd.dentry = dget(current->fs->root);
        read_unlock(&current->fs->lock);
        down_write(&namespace_sem);
-        down(&old_nd.dentry->d_inode->i_sem);
+        mutex_lock(&old_nd.dentry->d_inode->i_mutex);
        error = -EINVAL;
        if (IS_MNT_SHARED(old_nd.mnt) ||
                IS_MNT_SHARED(new_nd.mnt->mnt_parent) ||
@@ -1622,7 +1637,7 @@ asmlinkage long sys_pivot_root(const char __user * new_root,
        path_release(&root_parent);
        path_release(&parent_nd);
 out2:
-        up(&old_nd.dentry->d_inode->i_sem);
+        mutex_unlock(&old_nd.dentry->d_inode->i_mutex);
        up_write(&namespace_sem);
        path_release(&user_nd);
        path_release(&old_nd);
@@ -1714,6 +1729,7 @@ void __init mnt_init(unsigned long mempages)
                i--;
        } while (i);
        sysfs_init();
+        subsystem_register(&fs_subsys);
        init_rootfs();
        init_mount_tree();
 }
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index a9f7a8ab1d59..cfd76f431dc0 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -365,7 +365,7 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
        spin_lock(&dcache_lock);
        next = parent->d_subdirs.next;
        while (next != &parent->d_subdirs) {
-                dent = list_entry(next, struct dentry, d_child);
+                dent = list_entry(next, struct dentry, d_u.d_child);
                if ((unsigned long)dent->d_fsdata == fpos) {
                        if (dent->d_inode)
                                dget_locked(dent);
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index 4947d9b11fc1..973b444d6914 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -262,7 +262,7 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *
        }
        vfree(bouncebuffer);
-        inode_update_time(inode, 1);
+        file_update_time(file);
        *ppos = pos;
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 8c8839203cd5..d277a58bd128 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -716,10 +716,8 @@ static void ncp_put_super(struct super_block *sb)
        fput(server->ncp_filp);
        kill_proc(server->m.wdog_pid, SIGTERM, 1);
-        if (server->priv.data) 
+        kfree(server->priv.data);
-                ncp_kfree_s(server->priv.data, server->priv.len);
+        kfree(server->auth.object_name);
-        if (server->auth.object_name)
-                ncp_kfree_s(server->auth.object_name, server->auth.object_name_len);
        vfree(server->packet);
        sb->s_fs_info = NULL;
        kfree(server);
@@ -958,11 +956,6 @@ out:
        return result;
 }
-#ifdef DEBUG_NCP_MALLOC
-int ncp_malloced;
-int ncp_current_malloced;
-#endif
 static struct super_block *ncp_get_sb(struct file_system_type *fs_type,
        int flags, const char *dev_name, void *data)
 {
@@ -981,10 +974,6 @@ static int __init init_ncp_fs(void)
        int err;
        DPRINTK("ncpfs: init_module called\n");
-#ifdef DEBUG_NCP_MALLOC
-        ncp_malloced = 0;
-        ncp_current_malloced = 0;
-#endif
        err = init_inodecache();
        if (err)
                goto out1;
@@ -1003,10 +992,6 @@ static void __exit exit_ncp_fs(void)
        DPRINTK("ncpfs: cleanup_module called\n");
        unregister_filesystem(&ncp_fs_type);
        destroy_inodecache();
-#ifdef DEBUG_NCP_MALLOC
-        PRINTK("ncp_malloced: %d\n", ncp_malloced);
-        PRINTK("ncp_current_malloced: %d\n", ncp_current_malloced);
-#endif
 }
 module_init(init_ncp_fs)
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index fd3efdca5ae3..eb3813ad136f 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -10,6 +10,7 @@
 #include <linux/config.h>
 #include <asm/uaccess.h>
+#include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
 #include <linux/ioctl.h>
@@ -517,10 +518,11 @@ outrel:
                        if (user.object_name_len > NCP_OBJECT_NAME_MAX_LEN)
                                return -ENOMEM;
                        if (user.object_name_len) {
-                                newname = ncp_kmalloc(user.object_name_len, GFP_USER);
+                                newname = kmalloc(user.object_name_len, GFP_USER);
-                                if (!newname) return -ENOMEM;
+                                if (!newname)
+                                        return -ENOMEM;
                                if (copy_from_user(newname, user.object_name, user.object_name_len)) {
-                                        ncp_kfree_s(newname, user.object_name_len);
+                                        kfree(newname);
                                        return -EFAULT;
                                }
                        } else {
@@ -539,8 +541,8 @@ outrel:
                        server->priv.len = 0;
                        server->priv.data = NULL;
                        /* leave critical section */
-                        if (oldprivate) ncp_kfree_s(oldprivate, oldprivatelen);
+                        kfree(oldprivate);
-                        if (oldname) ncp_kfree_s(oldname, oldnamelen);
+                        kfree(oldname);
                        return 0;
                }
        case NCP_IOC_GETPRIVATEDATA:
@@ -580,10 +582,11 @@ outrel:
                        if (user.len > NCP_PRIVATE_DATA_MAX_LEN)
                                return -ENOMEM;
                        if (user.len) {
-                                new = ncp_kmalloc(user.len, GFP_USER);
+                                new = kmalloc(user.len, GFP_USER);
-                                if (!new) return -ENOMEM;
+                                if (!new)
+                                        return -ENOMEM;
                                if (copy_from_user(new, user.data, user.len)) {
-                                        ncp_kfree_s(new, user.len);
+                                        kfree(new);
                                        return -EFAULT;
                                }
                        } else {
@@ -595,7 +598,7 @@ outrel:
                        server->priv.len = user.len;
                        server->priv.data = new;
                        /* leave critical section */
-                        if (old) ncp_kfree_s(old, oldlen);
+                        kfree(old);
                        return 0;
                }
diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h
index 9e4dc30c2435..799e5c2bec55 100644
--- a/fs/ncpfs/ncplib_kernel.h
+++ b/fs/ncpfs/ncplib_kernel.h
@@ -196,7 +196,7 @@ ncp_renew_dentries(struct dentry *parent)
        spin_lock(&dcache_lock);
        next = parent->d_subdirs.next;
        while (next != &parent->d_subdirs) {
-                dentry = list_entry(next, struct dentry, d_child);
+                dentry = list_entry(next, struct dentry, d_u.d_child);
                if (dentry->d_fsdata == NULL)
                        ncp_age_dentry(server, dentry);
@@ -218,7 +218,7 @@ ncp_invalidate_dircache_entries(struct dentry *parent)
        spin_lock(&dcache_lock);
        next = parent->d_subdirs.next;
        while (next != &parent->d_subdirs) {
-                dentry = list_entry(next, struct dentry, d_child);
+                dentry = list_entry(next, struct dentry, d_u.d_child);
                dentry->d_fsdata = NULL;
                ncp_age_dentry(server, dentry);
                next = next->next;
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e9255198f767..a1554bead692 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -194,7 +194,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
        spin_unlock(&inode->i_lock);
        /* Ensure consistent page alignment of the data.
         * Note: assumes we have exclusive access to this mapping either
-         *       through inode->i_sem or some other mechanism.
+         *       through inode->i_mutex or some other mechanism.
         */
        if (page->index == 0)
                invalidate_inode_pages2_range(inode->i_mapping, PAGE_CACHE_SIZE, -1);
@@ -573,7 +573,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin)
 {
-        down(&filp->f_dentry->d_inode->i_sem);
+        mutex_lock(&filp->f_dentry->d_inode->i_mutex);
        switch (origin) {
                case 1:
                        offset += filp->f_pos;
@@ -589,7 +589,7 @@ loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin)
                ((struct nfs_open_context *)filp->private_data)->dir_cookie = 0;
        }
 out:
-        up(&filp->f_dentry->d_inode->i_sem);
+        mutex_unlock(&filp->f_dentry->d_inode->i_mutex);
        return offset;
 }
@@ -1001,7 +1001,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
        openflags &= ~(O_CREAT|O_TRUNC);
        /*
-         * Note: we're not holding inode->i_sem and so may be racing with
+         * Note: we're not holding inode->i_mutex and so may be racing with
         * operations that change the directory. We therefore save the
         * change attribute *before* we do the RPC call.
         */
@@ -1051,7 +1051,7 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc)
                return dentry;
        if (!desc->plus || !(entry->fattr->valid & NFS_ATTR_FATTR))
                return NULL;
-        /* Note: caller is already holding the dir->i_sem! */
+        /* Note: caller is already holding the dir->i_mutex! */
        dentry = d_alloc(parent, &name);
        if (dentry == NULL)
                return NULL;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index e7bd0d92600f..a77ee95b7efb 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -644,10 +644,7 @@ int nfs_sync_mapping(struct address_space *mapping)
        if (mapping->nrpages == 0)
                return 0;
        unmap_mapping_range(mapping, 0, 0, 0);
-        ret = filemap_fdatawrite(mapping);
+        ret = filemap_write_and_wait(mapping);
-        if (ret != 0)
-                goto out;
-        ret = filemap_fdatawait(mapping);
        if (ret != 0)
                goto out;
        ret = nfs_wb_all(mapping->host);
@@ -864,8 +861,7 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
        nfs_begin_data_update(inode);
        /* Write all dirty data if we're changing file permissions or size */
        if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE)) != 0) {
-                if (filemap_fdatawrite(inode->i_mapping) == 0)
+                filemap_write_and_wait(inode->i_mapping);
-                        filemap_fdatawait(inode->i_mapping);
                nfs_wb_all(inode);
        }
        /*
@@ -954,11 +950,20 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
        /* Flush out writes to the server in order to update c/mtime */
        nfs_sync_inode(inode, 0, 0, FLUSH_WAIT|FLUSH_NOCOMMIT);
-        if (__IS_FLG(inode, MS_NOATIME))
-                need_atime = 0;
+        /*
-        else if (__IS_FLG(inode, MS_NODIRATIME) && S_ISDIR(inode->i_mode))
+         * We may force a getattr if the user cares about atime.
+         *
+         * Note that we only have to check the vfsmount flags here:
+         *  - NFS always sets S_NOATIME by so checking it would give a
+         *    bogus result
+         *  - NFS never sets MS_NOATIME or MS_NODIRATIME so there is
+         *    no point in checking those.
+         */
+        if ((mnt->mnt_flags & MNT_NOATIME) ||
+            ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)))
                need_atime = 0;
-        /* We may force a getattr if the user cares about atime */
        if (need_atime)
                err = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
        else
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 985cc53b8dd5..e897e00c2c9d 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -275,7 +275,9 @@ static int __init root_nfs_parse(char *name, char *buf)
                        case Opt_noacl:
                                nfs_data.flags |= NFS_MOUNT_NOACL;
                                break;
-                        default : 
+                        default:
+                                printk(KERN_WARNING "Root-NFS: unknown "
+                                        "option: %s\n", p);
                                return 0;
                }
        }
diff --git a/fs/nfsctl.c b/fs/nfsctl.c
index 0b14938b5b62..0d4cf9486068 100644
--- a/fs/nfsctl.c
+++ b/fs/nfsctl.c
@@ -5,6 +5,7 @@
 *
 */
 #include <linux/config.h>
+#include <linux/types.h>
 #include <linux/file.h>
 #include <linux/fs.h>
 #include <linux/sunrpc/svc.h>
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 361b4007d4a0..a00fe8686293 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -192,6 +192,14 @@ nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open
        }
        if (status)
                goto out;
+        /* Openowner is now set, so sequence id will get bumped.  Now we need
+         * these checks before we do any creates: */
+        if (nfs4_in_grace() && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS)
+                return nfserr_grace;
+        if (!nfs4_in_grace() && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS)
+                return nfserr_no_grace;
        switch (open->op_claim_type) {
                case NFS4_OPEN_CLAIM_DELEGATE_CUR:
                        status = nfserr_inval;
@@ -210,6 +218,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open
                                goto out;
                        break;
                case NFS4_OPEN_CLAIM_PREVIOUS:
+                        open->op_stateowner->so_confirmed = 1;
                        /*
                         * The CURRENT_FH is already set to the file being
                         * opened.  (1) set open->op_cinfo, (2) set
@@ -221,6 +230,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open
                                goto out;
                        break;
                case NFS4_OPEN_CLAIM_DELEGATE_PREV:
+                        open->op_stateowner->so_confirmed = 1;
                        printk("NFSD: unsupported OPEN claim type %d\n",
                                open->op_claim_type);
                        status = nfserr_notsupp;
@@ -584,31 +594,23 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_se
 {
        int status = nfs_ok;
-        if (!current_fh->fh_dentry)
-                return nfserr_nofilehandle;
-        status = nfs_ok;
        if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
                nfs4_lock_state();
-                if ((status = nfs4_preprocess_stateid_op(current_fh,
+                status = nfs4_preprocess_stateid_op(current_fh,
-                                                &setattr->sa_stateid,
+                        &setattr->sa_stateid, CHECK_FH | WR_STATE, NULL);
-                                                CHECK_FH | WR_STATE, NULL))) {
-                        dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");
-                        goto out_unlock;
-                }
                nfs4_unlock_state();
+                if (status) {
+                        dprintk("NFSD: nfsd4_setattr: couldn't process stateid!");
+                        return status;
+                }
        }
        status = nfs_ok;
        if (setattr->sa_acl != NULL)
                status = nfsd4_set_nfs4_acl(rqstp, current_fh, setattr->sa_acl);
        if (status)
-                goto out;
+                return status;
        status = nfsd_setattr(rqstp, current_fh, &setattr->sa_iattr,
                                0, (time_t)0);
-out:
-        return status;
-out_unlock:
-        nfs4_unlock_state();
        return status;
 }
@@ -626,15 +628,17 @@ nfsd4_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_writ
                return nfserr_inval;
        nfs4_lock_state();
-        if ((status = nfs4_preprocess_stateid_op(current_fh, stateid,
+        status = nfs4_preprocess_stateid_op(current_fh, stateid,
-                                        CHECK_FH | WR_STATE, &filp))) {
+                                        CHECK_FH | WR_STATE, &filp);
-                dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
-                goto out;
-        }
        if (filp)
                get_file(filp);
        nfs4_unlock_state();
+        if (status) {
+                dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
+                return status;
+        }
        write->wr_bytes_written = write->wr_buflen;
        write->wr_how_written = write->wr_stable_how;
        p = (u32 *)write->wr_verifier.data;
@@ -650,9 +654,6 @@ nfsd4_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_writ
        if (status == nfserr_symlink)
                status = nfserr_inval;
        return status;
-out:
-        nfs4_unlock_state();
-        return status;
 }
 /* This routine never returns NFS_OK!  If there are no other errors, it
@@ -768,6 +769,8 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
        while (!status && resp->opcnt < args->opcnt) {
                op = &args->ops[resp->opcnt++];
+                dprintk("nfsv4 compound op #%d: %d\n", resp->opcnt, op->opnum);
                /*
                 * The XDR decode routines may have pre-set op->status;
                 * for example, if there is a miscellaneous XDR error
@@ -792,17 +795,13 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
                /* All operations except RENEW, SETCLIENTID, RESTOREFH
                * SETCLIENTID_CONFIRM, PUTFH and PUTROOTFH
                * require a valid current filehandle
-                *
-                * SETATTR NOFILEHANDLE error handled in nfsd4_setattr
-                * due to required returned bitmap argument
                */
                if ((!current_fh->fh_dentry) &&
                   !((op->opnum == OP_PUTFH) || (op->opnum == OP_PUTROOTFH) ||
                   (op->opnum == OP_SETCLIENTID) ||
                   (op->opnum == OP_SETCLIENTID_CONFIRM) ||
                   (op->opnum == OP_RENEW) || (op->opnum == OP_RESTOREFH) ||
-                   (op->opnum == OP_RELEASE_LOCKOWNER) ||
+                   (op->opnum == OP_RELEASE_LOCKOWNER))) {
-                   (op->opnum == OP_SETATTR))) {
                        op->status = nfserr_nofilehandle;
                        goto encode_op;
                }
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 954cf893d50c..06da7506363c 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -121,9 +121,9 @@ out:
 static void
 nfsd4_sync_rec_dir(void)
 {
-        down(&rec_dir.dentry->d_inode->i_sem);
+        mutex_lock(&rec_dir.dentry->d_inode->i_mutex);
        nfsd_sync_dir(rec_dir.dentry);
-        up(&rec_dir.dentry->d_inode->i_sem);
+        mutex_unlock(&rec_dir.dentry->d_inode->i_mutex);
 }
 int
@@ -143,7 +143,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
        nfs4_save_user(&uid, &gid);
        /* lock the parent */
-        down(&rec_dir.dentry->d_inode->i_sem);
+        mutex_lock(&rec_dir.dentry->d_inode->i_mutex);
        dentry = lookup_one_len(dname, rec_dir.dentry, HEXDIR_LEN-1);
        if (IS_ERR(dentry)) {
@@ -159,7 +159,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
 out_put:
        dput(dentry);
 out_unlock:
-        up(&rec_dir.dentry->d_inode->i_sem);
+        mutex_unlock(&rec_dir.dentry->d_inode->i_mutex);
        if (status == 0) {
                clp->cl_firststate = 1;
                nfsd4_sync_rec_dir();
@@ -222,8 +222,7 @@ nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f)
        nfs4_save_user(&uid, &gid);
-        filp = dentry_open(dget(dir), mntget(rec_dir.mnt),
+        filp = dentry_open(dget(dir), mntget(rec_dir.mnt), O_RDONLY);
-                        O_RDWR);
        status = PTR_ERR(filp);
        if (IS_ERR(filp))
                goto out;
@@ -259,9 +258,9 @@ nfsd4_remove_clid_file(struct dentry *dir, struct dentry *dentry)
                printk("nfsd4: non-file found in client recovery directory\n");
                return -EINVAL;
        }
-        down(&dir->d_inode->i_sem);
+        mutex_lock(&dir->d_inode->i_mutex);
        status = vfs_unlink(dir->d_inode, dentry);
-        up(&dir->d_inode->i_sem);
+        mutex_unlock(&dir->d_inode->i_mutex);
        return status;
 }
@@ -274,9 +273,9 @@ nfsd4_clear_clid_dir(struct dentry *dir, struct dentry *dentry)
         * any regular files anyway, just in case the directory was created by
         * a kernel from the future.... */
        nfsd4_list_rec_dir(dentry, nfsd4_remove_clid_file);
-        down(&dir->d_inode->i_sem);
+        mutex_lock(&dir->d_inode->i_mutex);
        status = vfs_rmdir(dir->d_inode, dentry);
-        up(&dir->d_inode->i_sem);
+        mutex_unlock(&dir->d_inode->i_mutex);
        return status;
 }
@@ -288,9 +287,9 @@ nfsd4_unlink_clid_dir(char *name, int namlen)
        dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name);
-        down(&rec_dir.dentry->d_inode->i_sem);
+        mutex_lock(&rec_dir.dentry->d_inode->i_mutex);
        dentry = lookup_one_len(name, rec_dir.dentry, namlen);
-        up(&rec_dir.dentry->d_inode->i_sem);
+        mutex_unlock(&rec_dir.dentry->d_inode->i_mutex);
        if (IS_ERR(dentry)) {
                status = PTR_ERR(dentry);
                return status;
@@ -400,9 +399,10 @@ nfsd4_init_recdir(char *rec_dirname)
        nfs4_save_user(&uid, &gid);
-        status = path_lookup(rec_dirname, LOOKUP_FOLLOW, &rec_dir);
+        status = path_lookup(rec_dirname, LOOKUP_FOLLOW | LOOKUP_DIRECTORY,
-        if (status == -ENOENT)
+                        &rec_dir);
-                printk("NFSD: recovery directory %s doesn't exist\n",
+        if (status)
+                printk("NFSD: unable to find recovery directory %s\n",
                                rec_dirname);
        if (!status)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 6bbefd06f10d..1143cfb64549 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1088,7 +1088,7 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, str
        sop->so_seqid = open->op_seqid;
        sop->so_confirmed = 0;
        rp = &sop->so_replay;
-        rp->rp_status = NFSERR_SERVERFAULT;
+        rp->rp_status = nfserr_serverfault;
        rp->rp_buflen = 0;
        rp->rp_buf = rp->rp_ibuf;
        return sop;
@@ -1178,7 +1178,6 @@ release_stateid(struct nfs4_stateid *stp, int flags)
                locks_remove_posix(filp, (fl_owner_t) stp->st_stateowner);
        put_nfs4_file(stp->st_file);
        kmem_cache_free(stateid_slab, stp);
-        stp = NULL;
 }
 static void
@@ -1191,22 +1190,6 @@ move_to_close_lru(struct nfs4_stateowner *sop)
        sop->so_time = get_seconds();
 }
-static void
-release_state_owner(struct nfs4_stateid *stp, int flag)
-{
-        struct nfs4_stateowner *sop = stp->st_stateowner;
-        dprintk("NFSD: release_state_owner\n");
-        release_stateid(stp, flag);
-        /* place unused nfs4_stateowners on so_close_lru list to be
-         * released by the laundromat service after the lease period
-         * to enable us to handle CLOSE replay
-         */
-        if (sop->so_confirmed && list_empty(&sop->so_stateids))
-                move_to_close_lru(sop);
-}
 static int
 cmp_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner, clientid_t *clid) {
        return ((sop->so_owner.len == owner->len) && 
@@ -1446,92 +1429,61 @@ static struct lock_manager_operations nfsd_lease_mng_ops = {
 };
-/*
- * nfsd4_process_open1()
- *      lookup stateowner.
- *              found:
- *                      check confirmed 
- *                              confirmed:
- *                                      check seqid
- *                              not confirmed:
- *                                      delete owner
- *                                      create new owner
- *              notfound:
- *                      verify clientid
- *                      create new owner
- *
- * called with nfs4_lock_state() held.
- */
 int
 nfsd4_process_open1(struct nfsd4_open *open)
 {
-        int status;
        clientid_t *clientid = &open->op_clientid;
        struct nfs4_client *clp = NULL;
        unsigned int strhashval;
        struct nfs4_stateowner *sop = NULL;
-        status = nfserr_inval;
        if (!check_name(open->op_owner))
-                goto out;
+                return nfserr_inval;
        if (STALE_CLIENTID(&open->op_clientid))
                return nfserr_stale_clientid;
        strhashval = ownerstr_hashval(clientid->cl_id, open->op_owner);
        sop = find_openstateowner_str(strhashval, open);
-        if (sop) {
+        open->op_stateowner = sop;
-                open->op_stateowner = sop;
+        if (!sop) {
-                /* check for replay */
+                /* Make sure the client's lease hasn't expired. */
-                if (open->op_seqid == sop->so_seqid - 1){
-                        if (sop->so_replay.rp_buflen)
-                                return NFSERR_REPLAY_ME;
-                        else {
-                                /* The original OPEN failed so spectacularly
-                                 * that we don't even have replay data saved!
-                                 * Therefore, we have no choice but to continue
-                                 * processing this OPEN; presumably, we'll
-                                 * fail again for the same reason.
-                                 */
-                                dprintk("nfsd4_process_open1:"
-                                        " replay with no replay cache\n");
-                                goto renew;
-                        }
-                } else if (sop->so_confirmed) {
-                        if (open->op_seqid == sop->so_seqid)
-                                goto renew;
-                        status = nfserr_bad_seqid;
-                        goto out;
-                } else {
-                        /* If we get here, we received an OPEN for an
-                         * unconfirmed nfs4_stateowner. Since the seqid's are
-                         * different, purge the existing nfs4_stateowner, and
-                         * instantiate a new one.
-                         */
-                        clp = sop->so_client;
-                        release_stateowner(sop);
-                }
-        } else {
-                /* nfs4_stateowner not found.
-                 * Verify clientid and instantiate new nfs4_stateowner.
-                 * If verify fails this is presumably the result of the
-                 * client's lease expiring.
-                 */
-                status = nfserr_expired;
                clp = find_confirmed_client(clientid);
                if (clp == NULL)
-                        goto out;
+                        return nfserr_expired;
+                goto renew;
        }
-        status = nfserr_resource;
+        if (!sop->so_confirmed) {
-        sop = alloc_init_open_stateowner(strhashval, clp, open);
+                /* Replace unconfirmed owners without checking for replay. */
-        if (sop == NULL)
+                clp = sop->so_client;
-                goto out;
+                release_stateowner(sop);
-        open->op_stateowner = sop;
+                open->op_stateowner = NULL;
+                goto renew;
+        }
+        if (open->op_seqid == sop->so_seqid - 1) {
+                if (sop->so_replay.rp_buflen)
+                        return NFSERR_REPLAY_ME;
+                /* The original OPEN failed so spectacularly
+                 * that we don't even have replay data saved!
+                 * Therefore, we have no choice but to continue
+                 * processing this OPEN; presumably, we'll
+                 * fail again for the same reason.
+                 */
+                dprintk("nfsd4_process_open1: replay with no replay cache\n");
+                goto renew;
+        }
+        if (open->op_seqid != sop->so_seqid)
+                return nfserr_bad_seqid;
 renew:
-        status = nfs_ok;
+        if (open->op_stateowner == NULL) {
+                sop = alloc_init_open_stateowner(strhashval, clp, open);
+                if (sop == NULL)
+                        return nfserr_resource;
+                open->op_stateowner = sop;
+        }
+        list_del_init(&sop->so_close_lru);
        renew_client(sop->so_client);
-out:
+        return nfs_ok;
-        return status;
 }
 static inline int
@@ -1648,7 +1600,7 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh,
        if (!open->op_truncate)
                return 0;
        if (!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE))
-                return -EINVAL;
+                return nfserr_inval;
        return nfsd_setattr(rqstp, fh, &iattr, 0, (time_t)0);
 }
@@ -1657,26 +1609,26 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct svc_fh *cur_fh, struct nfs4_sta
 {
        struct file *filp = stp->st_vfs_file;
        struct inode *inode = filp->f_dentry->d_inode;
-        unsigned int share_access;
+        unsigned int share_access, new_writer;
        int status;
        set_access(&share_access, stp->st_access_bmap);
-        share_access = ~share_access;
+        new_writer = (~share_access) & open->op_share_access
-        share_access &= open->op_share_access;
+                        & NFS4_SHARE_ACCESS_WRITE;
-        if (!(share_access & NFS4_SHARE_ACCESS_WRITE))
-                return nfsd4_truncate(rqstp, cur_fh, open);
-        status = get_write_access(inode);
+        if (new_writer) {
-        if (status)
+                status = get_write_access(inode);
-                return nfserrno(status);
+                if (status)
+                        return nfserrno(status);
+        }
        status = nfsd4_truncate(rqstp, cur_fh, open);
        if (status) {
-                put_write_access(inode);
+                if (new_writer)
+                        put_write_access(inode);
                return status;
        }
        /* remember the open */
-        filp->f_mode = (filp->f_mode | FMODE_WRITE) & ~FMODE_READ;
+        filp->f_mode |= open->op_share_access;
        set_bit(open->op_share_access, &stp->st_access_bmap);
        set_bit(open->op_share_deny, &stp->st_deny_bmap);
@@ -1780,12 +1732,6 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
        struct nfs4_delegation *dp = NULL;
        int status;
-        if (nfs4_in_grace() && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS)
-                return nfserr_grace;
-        if (!nfs4_in_grace() && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS)
-                return nfserr_no_grace;
        status = nfserr_inval;
        if (!TEST_ACCESS(open->op_share_access) || !TEST_DENY(open->op_share_deny))
                goto out;
@@ -2423,15 +2369,19 @@ nfsd4_close(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_clos
                                        CHECK_FH | OPEN_STATE | CLOSE_STATE,
                                        &close->cl_stateowner, &stp, NULL)))
                goto out; 
-        /*
-        *  Return success, but first update the stateid.
-        */
        status = nfs_ok;
        update_stateid(&stp->st_stateid);
        memcpy(&close->cl_stateid, &stp->st_stateid, sizeof(stateid_t));
-        /* release_state_owner() calls nfsd_close() if needed */
+        /* release_stateid() calls nfsd_close() if needed */
-        release_state_owner(stp, OPEN_STATE);
+        release_stateid(stp, OPEN_STATE);
+        /* place unused nfs4_stateowners on so_close_lru list to be
+         * released by the laundromat service after the lease period
+         * to enable us to handle CLOSE replay
+         */
+        if (list_empty(&close->cl_stateowner->so_stateids))
+                move_to_close_lru(close->cl_stateowner);
 out:
        if (close->cl_stateowner) {
                nfs4_get_stateowner(close->cl_stateowner);
@@ -2633,7 +2583,7 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, str
        sop->so_seqid = lock->lk_new_lock_seqid + 1;
        sop->so_confirmed = 1;
        rp = &sop->so_replay;
-        rp->rp_status = NFSERR_SERVERFAULT;
+        rp->rp_status = nfserr_serverfault;
        rp->rp_buflen = 0;
        rp->rp_buf = rp->rp_ibuf;
        return sop;
@@ -2700,6 +2650,11 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
        if (check_lock_length(lock->lk_offset, lock->lk_length))
                 return nfserr_inval;
+        if ((status = fh_verify(rqstp, current_fh, S_IFREG, MAY_LOCK))) {
+                dprintk("NFSD: nfsd4_lock: permission denied!\n");
+                return status;
+        }
        nfs4_lock_state();
        if (lock->lk_is_new) {
@@ -2720,11 +2675,11 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
                                        lock->lk_new_open_seqid,
                                        &lock->lk_new_open_stateid,
                                        CHECK_FH | OPEN_STATE,
-                                        &lock->lk_stateowner, &open_stp,
+                                        &lock->lk_replay_owner, &open_stp,
                                        lock);
                if (status)
                        goto out;
-                open_sop = lock->lk_stateowner;
+                open_sop = lock->lk_replay_owner;
                /* create lockowner and lock stateid */
                fp = open_stp->st_file;
                strhashval = lock_ownerstr_hashval(fp->fi_inode, 
@@ -2739,29 +2694,22 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
                if (lock_sop == NULL)
                        goto out;
                lock_stp = alloc_init_lock_stateid(lock_sop, fp, open_stp);
-                if (lock_stp == NULL) {
+                if (lock_stp == NULL)
-                        release_stateowner(lock_sop);
                        goto out;
-                }
        } else {
                /* lock (lock owner + lock stateid) already exists */
                status = nfs4_preprocess_seqid_op(current_fh,
                                       lock->lk_old_lock_seqid, 
                                       &lock->lk_old_lock_stateid, 
                                       CHECK_FH | LOCK_STATE, 
-                                       &lock->lk_stateowner, &lock_stp, lock);
+                                       &lock->lk_replay_owner, &lock_stp, lock);
                if (status)
                        goto out;
-                lock_sop = lock->lk_stateowner;
+                lock_sop = lock->lk_replay_owner;
        }
-        /* lock->lk_stateowner and lock_stp have been created or found */
+        /* lock->lk_replay_owner and lock_stp have been created or found */
        filp = lock_stp->st_vfs_file;
-        if ((status = fh_verify(rqstp, current_fh, S_IFREG, MAY_LOCK))) {
-                dprintk("NFSD: nfsd4_lock: permission denied!\n");
-                goto out;
-        }
        status = nfserr_grace;
        if (nfs4_in_grace() && !lock->lk_reclaim)
                goto out;
@@ -2802,8 +2750,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
        */
        status = posix_lock_file(filp, &file_lock);
-        if (file_lock.fl_ops && file_lock.fl_ops->fl_release_private)
-                file_lock.fl_ops->fl_release_private(&file_lock);
        dprintk("NFSD: nfsd4_lock: posix_lock_file status %d\n",status);
        switch (-status) {
        case 0: /* success! */
@@ -2815,9 +2761,12 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
                goto conflicting_lock;
        case (EDEADLK):
                status = nfserr_deadlock;
+                dprintk("NFSD: nfsd4_lock: posix_lock_file() failed! status %d\n",status);
+                goto out;
        default:        
+                status = nfserrno(status);
                dprintk("NFSD: nfsd4_lock: posix_lock_file() failed! status %d\n",status);
-                goto out_destroy_new_stateid;
+                goto out;
        }
 conflicting_lock:
@@ -2831,20 +2780,12 @@ conflicting_lock:
                goto out;
        }
        nfs4_set_lock_denied(conflock, &lock->lk_denied);
-out_destroy_new_stateid:
-        if (lock->lk_is_new) {
-                dprintk("NFSD: nfsd4_lock: destroy new stateid!\n");
-                /*
-                 * An error encountered after instantiation of the new
-                 * stateid has forced us to destroy it.
-                 */
-                release_state_owner(lock_stp, LOCK_STATE);
-        }
 out:
-        if (lock->lk_stateowner) {
+        if (status && lock->lk_is_new && lock_sop)
-                nfs4_get_stateowner(lock->lk_stateowner);
+                release_stateowner(lock_sop);
-                *replay_owner = lock->lk_stateowner;
+        if (lock->lk_replay_owner) {
+                nfs4_get_stateowner(lock->lk_replay_owner);
+                *replay_owner = lock->lk_replay_owner;
        }
        nfs4_unlock_state();
        return status;
@@ -2977,8 +2918,6 @@ nfsd4_locku(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
        *  Try to unlock the file in the VFS.
        */
        status = posix_lock_file(filp, &file_lock); 
-        if (file_lock.fl_ops && file_lock.fl_ops->fl_release_private)
-                file_lock.fl_ops->fl_release_private(&file_lock);
        if (status) {
                dprintk("NFSD: nfs4_locku: posix_lock_file failed!\n");
                goto out_nfserr;
@@ -3016,9 +2955,10 @@ check_for_locks(struct file *filp, struct nfs4_stateowner *lowner)
        lock_kernel();
        for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) {
-                if ((*flpp)->fl_owner == (fl_owner_t)lowner)
+                if ((*flpp)->fl_owner == (fl_owner_t)lowner) {
                        status = 1;
                        goto out;
+                }
        }
 out:
        unlock_kernel();
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index dcd673186944..69d3501173a8 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -528,7 +528,7 @@ nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock)
 {
        DECODE_HEAD;
-        lock->lk_stateowner = NULL;
+        lock->lk_replay_owner = NULL;
        /*
        * type, reclaim(boolean), offset, length, new_lock_owner(boolean)
        */
@@ -1764,10 +1764,11 @@ nfsd4_encode_dirent(struct readdir_cd *ccd, const char *name, int namlen,
                 */
                if (!(cd->rd_bmval[0] & FATTR4_WORD0_RDATTR_ERROR))
                        goto fail;
-                nfserr = nfserr_toosmall;
                p = nfsd4_encode_rdattr_error(p, buflen, nfserr);
-                if (p == NULL)
+                if (p == NULL) {
+                        nfserr = nfserr_toosmall;
                        goto fail;
+                }
        }
        cd->buflen -= (p - cd->buffer);
        cd->buffer = p;
@@ -1895,7 +1896,6 @@ nfsd4_encode_lock_denied(struct nfsd4_compoundres *resp, struct nfsd4_lock_denie
 static void
 nfsd4_encode_lock(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_lock *lock)
 {
        ENCODE_SEQID_OP_HEAD;
        if (!nfserr) {
@@ -1906,7 +1906,7 @@ nfsd4_encode_lock(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_lock
        } else if (nfserr == nfserr_denied)
                nfsd4_encode_lock_denied(resp, &lock->lk_denied);
-        ENCODE_SEQID_OP_TAIL(lock->lk_stateowner);
+        ENCODE_SEQID_OP_TAIL(lock->lk_replay_owner);
 }
 static void
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 0aa1b9603d7f..3e6b75cd90fd 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -36,6 +36,22 @@ nfsd_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
        return nfs_ok;
 }
+static int
+nfsd_return_attrs(int err, struct nfsd_attrstat *resp)
+{
+        if (err) return err;
+        return nfserrno(vfs_getattr(resp->fh.fh_export->ex_mnt,
+                                    resp->fh.fh_dentry,
+                                    &resp->stat));
+}
+static int
+nfsd_return_dirop(int err, struct nfsd_diropres *resp)
+{
+        if (err) return err;
+        return nfserrno(vfs_getattr(resp->fh.fh_export->ex_mnt,
+                                    resp->fh.fh_dentry,
+                                    &resp->stat));
+}
 /*
 * Get a file's attributes
 * N.B. After this call resp->fh needs an fh_put
@@ -44,10 +60,12 @@ static int
 nfsd_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle  *argp,
                                          struct nfsd_attrstat *resp)
 {
+        int nfserr;
        dprintk("nfsd: GETATTR  %s\n", SVCFH_fmt(&argp->fh));
        fh_copy(&resp->fh, &argp->fh);
-        return fh_verify(rqstp, &resp->fh, 0, MAY_NOP);
+        nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP);
+        return nfsd_return_attrs(nfserr, resp);
 }
 /*
@@ -58,12 +76,14 @@ static int
 nfsd_proc_setattr(struct svc_rqst *rqstp, struct nfsd_sattrargs *argp,
                                          struct nfsd_attrstat  *resp)
 {
+        int nfserr;
        dprintk("nfsd: SETATTR  %s, valid=%x, size=%ld\n",
                SVCFH_fmt(&argp->fh),
                argp->attrs.ia_valid, (long) argp->attrs.ia_size);
        fh_copy(&resp->fh, &argp->fh);
-        return nfsd_setattr(rqstp, &resp->fh, &argp->attrs,0, (time_t)0);
+        nfserr = nfsd_setattr(rqstp, &resp->fh, &argp->attrs,0, (time_t)0);
+        return nfsd_return_attrs(nfserr, resp);
 }
 /*
@@ -86,7 +106,7 @@ nfsd_proc_lookup(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
                                 &resp->fh);
        fh_put(&argp->fh);
-        return nfserr;
+        return nfsd_return_dirop(nfserr, resp);
 }
 /*
@@ -142,7 +162,10 @@ nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp,
                                  argp->vec, argp->vlen,
                                  &resp->count);
-        return nfserr;
+        if (nfserr) return nfserr;
+        return nfserrno(vfs_getattr(resp->fh.fh_export->ex_mnt,
+                                    resp->fh.fh_dentry,
+                                    &resp->stat));
 }
 /*
@@ -165,7 +188,7 @@ nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp,
                                   argp->vec, argp->vlen,
                                   argp->len,
                                   &stable);
-        return nfserr;
+        return nfsd_return_attrs(nfserr, resp);
 }
 /*
@@ -322,7 +345,7 @@ out_unlock:
 done:
        fh_put(dirfhp);
-        return nfserr;
+        return nfsd_return_dirop(nfserr, resp);
 }
 static int
@@ -425,7 +448,7 @@ nfsd_proc_mkdir(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
        nfserr = nfsd_create(rqstp, &argp->fh, argp->name, argp->len,
                                    &argp->attrs, S_IFDIR, 0, &resp->fh);
        fh_put(&argp->fh);
-        return nfserr;
+        return nfsd_return_dirop(nfserr, resp);
 }
 /*
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index aa7bb41b293d..e3a0797dd56b 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -37,7 +37,7 @@ static u32	nfs_ftypes[] = {
 /*
 * XDR functions for basic NFS types
 */
-static inline u32 *
+static u32 *
 decode_fh(u32 *p, struct svc_fh *fhp)
 {
        fh_init(fhp, NFS_FHSIZE);
@@ -151,7 +151,7 @@ decode_sattr(u32 *p, struct iattr *iap)
        return p;
 }
-static inline u32 *
+static u32 *
 encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp,
             struct kstat *stat)
 {
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index df4019f04560..5320e5afaddb 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -48,8 +48,8 @@
 #include <linux/fsnotify.h>
 #include <linux/posix_acl.h>
 #include <linux/posix_acl_xattr.h>
-#ifdef CONFIG_NFSD_V4
 #include <linux/xattr.h>
+#ifdef CONFIG_NFSD_V4
 #include <linux/nfs4.h>
 #include <linux/nfs4_acl.h>
 #include <linux/nfsd_idmap.h>
@@ -365,8 +365,30 @@ out_nfserr:
        goto out;
 }
-#if defined(CONFIG_NFSD_V4)
+#if defined(CONFIG_NFSD_V2_ACL) || \
+    defined(CONFIG_NFSD_V3_ACL) || \
+    defined(CONFIG_NFSD_V4)
+static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf)
+{
+        ssize_t buflen;
+        int error;
+        buflen = vfs_getxattr(dentry, key, NULL, 0);
+        if (buflen <= 0)
+                return buflen;
+        *buf = kmalloc(buflen, GFP_KERNEL);
+        if (!*buf)
+                return -ENOMEM;
+        error = vfs_getxattr(dentry, key, *buf, buflen);
+        if (error < 0)
+                return error;
+        return buflen;
+}
+#endif
+#if defined(CONFIG_NFSD_V4)
 static int
 set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key)
 {
@@ -374,7 +396,6 @@ set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key)
        size_t buflen;
        char *buf = NULL;
        int error = 0;
-        struct inode *inode = dentry->d_inode;
        buflen = posix_acl_xattr_size(pacl->a_count);
        buf = kmalloc(buflen, GFP_KERNEL);
@@ -388,15 +409,7 @@ set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key)
                goto out;
        }
-        error = -EOPNOTSUPP;
+        error = vfs_setxattr(dentry, key, buf, len, 0);
-        if (inode->i_op && inode->i_op->setxattr) {
-                down(&inode->i_sem);
-                security_inode_setxattr(dentry, key, buf, len, 0);
-                error = inode->i_op->setxattr(dentry, key, buf, len, 0);
-                if (!error)
-                        security_inode_post_setxattr(dentry, key, buf, len, 0);
-                up(&inode->i_sem);
-        }
 out:
        kfree(buf);
        return error;
@@ -455,44 +468,19 @@ out_nfserr:
 static struct posix_acl *
 _get_posix_acl(struct dentry *dentry, char *key)
 {
-        struct inode *inode = dentry->d_inode;
+        void *buf = NULL;
-        char *buf = NULL;
-        int buflen, error = 0;
        struct posix_acl *pacl = NULL;
+        int buflen;
-        error = -EOPNOTSUPP;
+        buflen = nfsd_getxattr(dentry, key, &buf);
-        if (inode->i_op == NULL)
+        if (!buflen)
-                goto out_err;
+                buflen = -ENODATA;
-        if (inode->i_op->getxattr == NULL)
+        if (buflen <= 0)
-                goto out_err;
+                return ERR_PTR(buflen);
-        error = security_inode_getxattr(dentry, key);
-        if (error)
-                goto out_err;
-        buflen = inode->i_op->getxattr(dentry, key, NULL, 0);
-        if (buflen <= 0) {
-                error = buflen < 0 ? buflen : -ENODATA;
-                goto out_err;
-        }
-        buf = kmalloc(buflen, GFP_KERNEL);
-        if (buf == NULL) {
-                error = -ENOMEM;
-                goto out_err;
-        }
-        error = inode->i_op->getxattr(dentry, key, buf, buflen);
-        if (error < 0)
-                goto out_err;
        pacl = posix_acl_from_xattr(buf, buflen);
- out:
        kfree(buf);
        return pacl;
- out_err:
-        pacl = ERR_PTR(error);
-        goto out;
 }
 int
@@ -722,14 +710,15 @@ static inline int nfsd_dosync(struct file *filp, struct dentry *dp,
 {
        struct inode *inode = dp->d_inode;
        int (*fsync) (struct file *, struct dentry *, int);
-        int err = nfs_ok;
+        int err;
-        filemap_fdatawrite(inode->i_mapping);
+        err = filemap_fdatawrite(inode->i_mapping);
-        if (fop && (fsync = fop->fsync))
+        if (err == 0 && fop && (fsync = fop->fsync))
-                err=fsync(filp, dp, 0);
+                err = fsync(filp, dp, 0);
-        filemap_fdatawait(inode->i_mapping);
+        if (err == 0)
+                err = filemap_fdatawait(inode->i_mapping);
-        return nfserrno(err);
+        return err;
 }
        
@@ -739,17 +728,17 @@ nfsd_sync(struct file *filp)
        int err;
        struct inode *inode = filp->f_dentry->d_inode;
        dprintk("nfsd: sync file %s\n", filp->f_dentry->d_name.name);
-        down(&inode->i_sem);
+        mutex_lock(&inode->i_mutex);
        err=nfsd_dosync(filp, filp->f_dentry, filp->f_op);
-        up(&inode->i_sem);
+        mutex_unlock(&inode->i_mutex);
        return err;
 }
-void
+int
 nfsd_sync_dir(struct dentry *dp)
 {
-        nfsd_dosync(NULL, dp, dp->d_inode->i_fop);
+        return nfsd_dosync(NULL, dp, dp->d_inode->i_fop);
 }
 /*
@@ -826,7 +815,7 @@ nfsd_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset
        return size;
 }
-static inline int
+static int
 nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
              loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
 {
@@ -885,12 +874,12 @@ static void kill_suid(struct dentry *dentry)
        struct iattr    ia;
        ia.ia_valid = ATTR_KILL_SUID | ATTR_KILL_SGID;
-        down(&dentry->d_inode->i_sem);
+        mutex_lock(&dentry->d_inode->i_mutex);
        notify_change(dentry, &ia);
-        up(&dentry->d_inode->i_sem);
+        mutex_unlock(&dentry->d_inode->i_mutex);
 }
-static inline int
+static int
 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
                                loff_t offset, struct kvec *vec, int vlen,
                                unsigned long cnt, int *stablep)
@@ -902,9 +891,9 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
        int                     err = 0;
        int                     stable = *stablep;
+#ifdef MSNFS
        err = nfserr_perm;
-#ifdef MSNFS
        if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
                (!lock_may_write(file->f_dentry->d_inode, offset, cnt)))
                goto out;
@@ -1076,7 +1065,7 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
                return err;
        if (EX_ISSYNC(fhp->fh_export)) {
                if (file->f_op && file->f_op->fsync) {
-                        err = nfsd_sync(file);
+                        err = nfserrno(nfsd_sync(file));
                } else {
                        err = nfserr_notsupp;
                }
@@ -1144,7 +1133,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
                                "nfsd_create: parent %s/%s not locked!\n",
                                dentry->d_parent->d_name.name,
                                dentry->d_name.name);
-                        err = -EIO;
+                        err = nfserr_io;
                        goto out;
                }
        }
@@ -1187,7 +1176,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
                goto out_nfserr;
        if (EX_ISSYNC(fhp->fh_export)) {
-                nfsd_sync_dir(dentry);
+                err = nfserrno(nfsd_sync_dir(dentry));
                write_inode_now(dchild->d_inode, 1);
        }
@@ -1197,9 +1186,11 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
         * send along the gid when it tries to implement setgid
         * directories via NFS.
         */
-        err = 0;
+        if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID|ATTR_MODE)) != 0) {
-        if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID|ATTR_MODE)) != 0)
+                int err2 = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
-                err = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
+                if (err2)
+                        err = err2;
+        }
        /*
         * Update the file handle to get the new inode info.
         */
@@ -1318,17 +1309,10 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
                goto out_nfserr;
        if (EX_ISSYNC(fhp->fh_export)) {
-                nfsd_sync_dir(dentry);
+                err = nfserrno(nfsd_sync_dir(dentry));
                /* setattr will sync the child (or not) */
        }
-        /*
-         * Update the filehandle to get the new inode info.
-         */
-        err = fh_update(resfhp);
-        if (err)
-                goto out;
        if (createmode == NFS3_CREATE_EXCLUSIVE) {
                /* Cram the verifier into atime/mtime/mode */
                iap->ia_valid = ATTR_MTIME|ATTR_ATIME
@@ -1349,8 +1333,17 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
         * implement setgid directories via NFS. Clear out all that cruft.
         */
 set_attr:
-        if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID)) != 0)
+        if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID)) != 0) {
-                err = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
+                int err2 = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
+                if (err2)
+                        err = err2;
+        }
+        /*
+         * Update the filehandle to get the new inode info.
+         */
+        if (!err)
+                err = fh_update(resfhp);
 out:
        fh_unlock(fhp);
@@ -1459,10 +1452,10 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
        } else
                err = vfs_symlink(dentry->d_inode, dnew, path, mode);
-        if (!err) {
+        if (!err)
                if (EX_ISSYNC(fhp->fh_export))
-                        nfsd_sync_dir(dentry);
+                        err = nfsd_sync_dir(dentry);
-        } else
+        if (err)
                err = nfserrno(err);
        fh_unlock(fhp);
@@ -1518,7 +1511,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
        err = vfs_link(dold, dirp, dnew);
        if (!err) {
                if (EX_ISSYNC(ffhp->fh_export)) {
-                        nfsd_sync_dir(ddir);
+                        err = nfserrno(nfsd_sync_dir(ddir));
                        write_inode_now(dest, 1);
                }
        } else {
@@ -1602,13 +1595,14 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
        if ((ffhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
                ((atomic_read(&odentry->d_count) > 1)
                 || (atomic_read(&ndentry->d_count) > 1))) {
-                        err = nfserr_perm;
+                        err = -EPERM;
        } else
 #endif
        err = vfs_rename(fdir, odentry, tdir, ndentry);
        if (!err && EX_ISSYNC(tfhp->fh_export)) {
-                nfsd_sync_dir(tdentry);
+                err = nfsd_sync_dir(tdentry);
-                nfsd_sync_dir(fdentry);
+                if (!err)
+                        err = nfsd_sync_dir(fdentry);
        }
 out_dput_new:
@@ -1673,7 +1667,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 #ifdef MSNFS
                if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
                        (atomic_read(&rdentry->d_count) > 1)) {
-                        err = nfserr_perm;
+                        err = -EPERM;
                } else
 #endif
                err = vfs_unlink(dirp, rdentry);
@@ -1683,17 +1677,14 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
        dput(rdentry);
-        if (err)
+        if (err == 0 &&
-                goto out_nfserr;
+            EX_ISSYNC(fhp->fh_export))
-        if (EX_ISSYNC(fhp->fh_export)) 
+                        err = nfsd_sync_dir(dentry);
-                nfsd_sync_dir(dentry);
-out:
-        return err;
 out_nfserr:
        err = nfserrno(err);
-        goto out;
+out:
+        return err;
 }
 /*
@@ -1884,39 +1875,25 @@ nfsd_get_posix_acl(struct svc_fh *fhp, int type)
        ssize_t size;
        struct posix_acl *acl;
-        if (!IS_POSIXACL(inode) || !inode->i_op || !inode->i_op->getxattr)
+        if (!IS_POSIXACL(inode))
+                return ERR_PTR(-EOPNOTSUPP);
+        switch (type) {
+        case ACL_TYPE_ACCESS:
+                name = POSIX_ACL_XATTR_ACCESS;
+                break;
+        case ACL_TYPE_DEFAULT:
+                name = POSIX_ACL_XATTR_DEFAULT;
+                break;
+        default:
                return ERR_PTR(-EOPNOTSUPP);
-        switch(type) {
-                case ACL_TYPE_ACCESS:
-                        name = POSIX_ACL_XATTR_ACCESS;
-                        break;
-                case ACL_TYPE_DEFAULT:
-                        name = POSIX_ACL_XATTR_DEFAULT;
-                        break;
-                default:
-                        return ERR_PTR(-EOPNOTSUPP);
        }
-        size = inode->i_op->getxattr(fhp->fh_dentry, name, NULL, 0);
+        size = nfsd_getxattr(fhp->fh_dentry, name, &value);
+        if (size < 0)
+                return ERR_PTR(size);
-        if (size < 0) {
-                acl = ERR_PTR(size);
-                goto getout;
-        } else if (size > 0) {
-                value = kmalloc(size, GFP_KERNEL);
-                if (!value) {
-                        acl = ERR_PTR(-ENOMEM);
-                        goto getout;
-                }
-                size = inode->i_op->getxattr(fhp->fh_dentry, name, value, size);
-                if (size < 0) {
-                        acl = ERR_PTR(size);
-                        goto getout;
-                }
-        }
        acl = posix_acl_from_xattr(value, size);
-getout:
        kfree(value);
        return acl;
 }
@@ -1957,16 +1934,13 @@ nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl)
        } else
                size = 0;
-        if (!fhp->fh_locked)
-                fh_lock(fhp);  /* unlocking is done automatically */
        if (size)
-                error = inode->i_op->setxattr(fhp->fh_dentry, name,
+                error = vfs_setxattr(fhp->fh_dentry, name, value, size, 0);
-                                              value, size, 0);
        else {
                if (!S_ISDIR(inode->i_mode) && type == ACL_TYPE_DEFAULT)
                        error = 0;
                else {
-                        error = inode->i_op->removexattr(fhp->fh_dentry, name);
+                        error = vfs_removexattr(fhp->fh_dentry, name);
                        if (error == -ENODATA)
                                error = 0;
                }
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index eda056bac256..9480a0526cd3 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -1532,7 +1532,7 @@ int ntfs_resident_attr_value_resize(MFT_RECORD *m, ATTR_RECORD *a,
 * NOTE to self: No changes in the attribute list are required to move from
 *               a resident to a non-resident attribute.
 *
- * Locking: - The caller must hold i_sem on the inode.
+ * Locking: - The caller must hold i_mutex on the inode.
 */
 int ntfs_attr_make_non_resident(ntfs_inode *ni, const u32 data_size)
 {
@@ -1728,7 +1728,7 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni, const u32 data_size)
        /*
         * This needs to be last since the address space operations ->readpage
         * and ->writepage can run concurrently with us as they are not
-         * serialized on i_sem.  Note, we are not allowed to fail once we flip
+         * serialized on i_mutex.  Note, we are not allowed to fail once we flip
         * this switch, which is another reason to do this last.
         */
        NInoSetNonResident(ni);
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index 795c3d1930f5..b0690d4c8906 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -69,7 +69,7 @@ ntfschar I30[5] = { const_cpu_to_le16('$'), const_cpu_to_le16('I'),
 * work but we don't care for how quickly one can access them. This also fixes
 * the dcache aliasing issues.
 *
- * Locking:  - Caller must hold i_sem on the directory.
+ * Locking:  - Caller must hold i_mutex on the directory.
 *           - Each page cache page in the index allocation mapping must be
 *             locked whilst being accessed otherwise we may find a corrupt
 *             page due to it being under ->writepage at the moment which
@@ -1085,11 +1085,11 @@ static inline int ntfs_filldir(ntfs_volume *vol, loff_t fpos,
 * While this will return the names in random order this doesn't matter for
 * ->readdir but OTOH results in a faster ->readdir.
 *
- * VFS calls ->readdir without BKL but with i_sem held. This protects the VFS
+ * VFS calls ->readdir without BKL but with i_mutex held. This protects the VFS
 * parts (e.g. ->f_pos and ->i_size, and it also protects against directory
 * modifications).
 *
- * Locking:  - Caller must hold i_sem on the directory.
+ * Locking:  - Caller must hold i_mutex on the directory.
 *           - Each page cache page in the index allocation mapping must be
 *             locked whilst being accessed otherwise we may find a corrupt
 *             page due to it being under ->writepage at the moment which
@@ -1520,7 +1520,7 @@ static int ntfs_dir_open(struct inode *vi, struct file *filp)
 * Note: In the past @filp could be NULL so we ignore it as we don't need it
 * anyway.
 *
- * Locking: Caller must hold i_sem on the inode.
+ * Locking: Caller must hold i_mutex on the inode.
 *
 * TODO: We should probably also write all attribute/index inodes associated
 * with this inode but since we have no simple way of getting to them we ignore
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 727533891813..fb413d3d8618 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -106,7 +106,7 @@ static int ntfs_file_open(struct inode *vi, struct file *filp)
 * this is the case, the necessary zeroing will also have happened and that all
 * metadata is self-consistent.
 *
- * Locking: i_sem on the vfs inode corrseponsind to the ntfs inode @ni must be
+ * Locking: i_mutex on the vfs inode corrseponsind to the ntfs inode @ni must be
 *          held by the caller.
 */
 static int ntfs_attr_extend_initialized(ntfs_inode *ni, const s64 new_init_size,
@@ -473,7 +473,7 @@ static inline int ntfs_submit_bh_for_read(struct buffer_head *bh)
 * @bytes:      number of bytes to be written
 *
 * This is called for non-resident attributes from ntfs_file_buffered_write()
- * with i_sem held on the inode (@pages[0]->mapping->host).  There are
+ * with i_mutex held on the inode (@pages[0]->mapping->host).  There are
 * @nr_pages pages in @pages which are locked but not kmap()ped.  The source
 * data has not yet been copied into the @pages.
 * 
@@ -1637,7 +1637,7 @@ err_out:
 * @pos:        byte position in file at which the write begins
 * @bytes:      number of bytes to be written
 *
- * This is called from ntfs_file_buffered_write() with i_sem held on the inode
+ * This is called from ntfs_file_buffered_write() with i_mutex held on the inode
 * (@pages[0]->mapping->host).  There are @nr_pages pages in @pages which are
 * locked but not kmap()ped.  The source data has already been copied into the
 * @page.  ntfs_prepare_pages_for_non_resident_write() has been called before
@@ -1814,7 +1814,7 @@ err_out:
 /**
 * ntfs_file_buffered_write -
 *
- * Locking: The vfs is holding ->i_sem on the inode.
+ * Locking: The vfs is holding ->i_mutex on the inode.
 */
 static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
                const struct iovec *iov, unsigned long nr_segs,
@@ -2173,7 +2173,7 @@ static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb,
        err = remove_suid(file->f_dentry);
        if (err)
                goto out;
-        inode_update_time(inode, 1);
+        file_update_time(file);
        written = ntfs_file_buffered_write(iocb, iov, nr_segs, pos, ppos,
                        count);
 out:
@@ -2196,9 +2196,9 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const char __user *buf,
        BUG_ON(iocb->ki_pos != pos);
-        down(&inode->i_sem);
+        mutex_lock(&inode->i_mutex);
        ret = ntfs_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos);
-        up(&inode->i_sem);
+        mutex_unlock(&inode->i_mutex);
        if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
                int err = sync_page_range(inode, mapping, pos, ret);
                if (err < 0)
@@ -2221,12 +2221,12 @@ static ssize_t ntfs_file_writev(struct file *file, const struct iovec *iov,
        struct kiocb kiocb;
        ssize_t ret;
-        down(&inode->i_sem);
+        mutex_lock(&inode->i_mutex);
        init_sync_kiocb(&kiocb, file);
        ret = ntfs_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos);
        if (ret == -EIOCBQUEUED)
                ret = wait_on_sync_kiocb(&kiocb);
-        up(&inode->i_sem);
+        mutex_unlock(&inode->i_mutex);
        if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
                int err = sync_page_range(inode, mapping, *ppos - ret, ret);
                if (err < 0)
@@ -2269,7 +2269,7 @@ static ssize_t ntfs_file_write(struct file *file, const char __user *buf,
 * Note: In the past @filp could be NULL so we ignore it as we don't need it
 * anyway.
 *
- * Locking: Caller must hold i_sem on the inode.
+ * Locking: Caller must hold i_mutex on the inode.
 *
 * TODO: We should probably also write all attribute/index inodes associated
 * with this inode but since we have no simple way of getting to them we ignore
diff --git a/fs/ntfs/index.c b/fs/ntfs/index.c
index 8f2d5727546f..9f5427c2d105 100644
--- a/fs/ntfs/index.c
+++ b/fs/ntfs/index.c
@@ -32,7 +32,7 @@
 * Allocate a new index context, initialize it with @idx_ni and return it.
 * Return NULL if allocation failed.
 *
- * Locking:  Caller must hold i_sem on the index inode.
+ * Locking:  Caller must hold i_mutex on the index inode.
 */
 ntfs_index_context *ntfs_index_ctx_get(ntfs_inode *idx_ni)
 {
@@ -50,7 +50,7 @@ ntfs_index_context *ntfs_index_ctx_get(ntfs_inode *idx_ni)
 *
 * Release the index context @ictx, releasing all associated resources.
 *
- * Locking:  Caller must hold i_sem on the index inode.
+ * Locking:  Caller must hold i_mutex on the index inode.
 */
 void ntfs_index_ctx_put(ntfs_index_context *ictx)
 {
@@ -106,7 +106,7 @@ void ntfs_index_ctx_put(ntfs_index_context *ictx)
 * or ntfs_index_entry_write() before the call to ntfs_index_ctx_put() to
 * ensure that the changes are written to disk.
 *
- * Locking:  - Caller must hold i_sem on the index inode.
+ * Locking:  - Caller must hold i_mutex on the index inode.
 *           - Each page cache page in the index allocation mapping must be
 *             locked whilst being accessed otherwise we may find a corrupt
 *             page due to it being under ->writepage at the moment which
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index b24f4c4b2c5c..ea1bd3feea1b 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -2125,13 +2125,13 @@ void ntfs_put_inode(struct inode *vi)
                ntfs_inode *ni = NTFS_I(vi);
                if (NInoIndexAllocPresent(ni)) {
                        struct inode *bvi = NULL;
-                        down(&vi->i_sem);
+                        mutex_lock(&vi->i_mutex);
                        if (atomic_read(&vi->i_count) == 2) {
                                bvi = ni->itype.index.bmp_ino;
                                if (bvi)
                                        ni->itype.index.bmp_ino = NULL;
                        }
-                        up(&vi->i_sem);
+                        mutex_unlock(&vi->i_mutex);
                        if (bvi)
                                iput(bvi);
                }
@@ -2311,7 +2311,7 @@ static const char *es = "  Leaving inconsistent metadata.  Unmount and run "
 *
 * Returns 0 on success or -errno on error.
 *
- * Called with ->i_sem held.  In all but one case ->i_alloc_sem is held for
+ * Called with ->i_mutex held.  In all but one case ->i_alloc_sem is held for
 * writing.  The only case in the kernel where ->i_alloc_sem is not held is
 * mm/filemap.c::generic_file_buffered_write() where vmtruncate() is called
 * with the current i_size as the offset.  The analogous place in NTFS is in
@@ -2767,7 +2767,25 @@ unm_done:
        up_write(&ni->runlist.lock);
 done:
        /* Update the mtime and ctime on the base inode. */
-        inode_update_time(VFS_I(base_ni), 1);
+        /* normally ->truncate shouldn't update ctime or mtime,
+         * but ntfs did before so it got a copy & paste version
+         * of file_update_time.  one day someone should fix this
+         * for real.
+         */
+        if (!IS_NOCMTIME(VFS_I(base_ni)) && !IS_RDONLY(VFS_I(base_ni))) {
+                struct timespec now = current_fs_time(VFS_I(base_ni)->i_sb);
+                int sync_it = 0;
+                if (!timespec_equal(&VFS_I(base_ni)->i_mtime, &now) ||
+                    !timespec_equal(&VFS_I(base_ni)->i_ctime, &now))
+                        sync_it = 1;
+                VFS_I(base_ni)->i_mtime = now;
+                VFS_I(base_ni)->i_ctime = now;
+                if (sync_it)
+                        mark_inode_dirty_sync(VFS_I(base_ni));
+        }
        if (likely(!err)) {
                NInoClearTruncateFailed(ni);
                ntfs_debug("Done.");
@@ -2831,7 +2849,7 @@ void ntfs_truncate_vfs(struct inode *vi) {
 * We also abort all changes of user, group, and mode as we do not implement
 * the NTFS ACLs yet.
 *
- * Called with ->i_sem held.  For the ATTR_SIZE (i.e. ->truncate) case, also
+ * Called with ->i_mutex held.  For the ATTR_SIZE (i.e. ->truncate) case, also
 * called with ->i_alloc_sem held for writing.
 *
 * Basically this is a copy of generic notify_change() and inode_setattr()
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c
index 351dbc3b6e40..5ea9eb93af62 100644
--- a/fs/ntfs/namei.c
+++ b/fs/ntfs/namei.c
@@ -96,7 +96,7 @@
 *    name. We then convert the name to the current NLS code page, and proceed
 *    searching for a dentry with this name, etc, as in case 2), above.
 *
- * Locking: Caller must hold i_sem on the directory.
+ * Locking: Caller must hold i_mutex on the directory.
 */
 static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent,
                struct nameidata *nd)
@@ -254,7 +254,7 @@ handle_name:
        nls_name.hash = full_name_hash(nls_name.name, nls_name.len);
        /*
-         * Note: No need for dent->d_lock lock as i_sem is held on the
+         * Note: No need for dent->d_lock lock as i_mutex is held on the
         * parent inode.
         */
@@ -374,7 +374,7 @@ struct inode_operations ntfs_dir_inode_ops = {
 * The code is based on the ext3 ->get_parent() implementation found in
 * fs/ext3/namei.c::ext3_get_parent().
 *
- * Note: ntfs_get_parent() is called with @child_dent->d_inode->i_sem down.
+ * Note: ntfs_get_parent() is called with @child_dent->d_inode->i_mutex down.
 *
 * Return the dentry of the parent directory on success or the error code on
 * error (IS_ERR() is true).
diff --git a/fs/ntfs/quota.c b/fs/ntfs/quota.c
index 833df2a4e9fb..d0ef4182147b 100644
--- a/fs/ntfs/quota.c
+++ b/fs/ntfs/quota.c
@@ -48,7 +48,7 @@ BOOL ntfs_mark_quotas_out_of_date(ntfs_volume *vol)
                ntfs_error(vol->sb, "Quota inodes are not open.");
                return FALSE;
        }
-        down(&vol->quota_q_ino->i_sem);
+        mutex_lock(&vol->quota_q_ino->i_mutex);
        ictx = ntfs_index_ctx_get(NTFS_I(vol->quota_q_ino));
        if (!ictx) {
                ntfs_error(vol->sb, "Failed to get index context.");
@@ -98,7 +98,7 @@ BOOL ntfs_mark_quotas_out_of_date(ntfs_volume *vol)
        ntfs_index_entry_mark_dirty(ictx);
 set_done:
        ntfs_index_ctx_put(ictx);
-        up(&vol->quota_q_ino->i_sem);
+        mutex_unlock(&vol->quota_q_ino->i_mutex);
        /*
         * We set the flag so we do not try to mark the quotas out of date
         * again on remount.
@@ -110,7 +110,7 @@ done:
 err_out:
        if (ictx)
                ntfs_index_ctx_put(ictx);
-        up(&vol->quota_q_ino->i_sem);
+        mutex_unlock(&vol->quota_q_ino->i_mutex);
        return FALSE;
 }
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 6c16db9e1a8a..c3a3f1a8310b 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -443,8 +443,8 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt)
        ntfs_debug("Entering with remount options string: %s", opt);
 #ifndef NTFS_RW
-        /* For read-only compiled driver, enforce all read-only flags. */
+        /* For read-only compiled driver, enforce read-only flag. */
-        *flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+        *flags |= MS_RDONLY;
 #else /* NTFS_RW */
        /*
         * For the read-write compiled driver, if we are remounting read-write,
@@ -1213,10 +1213,10 @@ static int check_windows_hibernation_status(ntfs_volume *vol)
         * Find the inode number for the hibernation file by looking up the
         * filename hiberfil.sys in the root directory.
         */
-        down(&vol->root_ino->i_sem);
+        mutex_lock(&vol->root_ino->i_mutex);
        mref = ntfs_lookup_inode_by_name(NTFS_I(vol->root_ino), hiberfil, 12,
                        &name);
-        up(&vol->root_ino->i_sem);
+        mutex_unlock(&vol->root_ino->i_mutex);
        if (IS_ERR_MREF(mref)) {
                ret = MREF_ERR(mref);
                /* If the file does not exist, Windows is not hibernated. */
@@ -1307,10 +1307,10 @@ static BOOL load_and_init_quota(ntfs_volume *vol)
         * Find the inode number for the quota file by looking up the filename
         * $Quota in the extended system files directory $Extend.
         */
-        down(&vol->extend_ino->i_sem);
+        mutex_lock(&vol->extend_ino->i_mutex);
        mref = ntfs_lookup_inode_by_name(NTFS_I(vol->extend_ino), Quota, 6,
                        &name);
-        up(&vol->extend_ino->i_sem);
+        mutex_unlock(&vol->extend_ino->i_mutex);
        if (IS_ERR_MREF(mref)) {
                /*
                 * If the file does not exist, quotas are disabled and have
@@ -1390,10 +1390,10 @@ static BOOL load_and_init_usnjrnl(ntfs_volume *vol)
         * Find the inode number for the transaction log file by looking up the
         * filename $UsnJrnl in the extended system files directory $Extend.
         */
-        down(&vol->extend_ino->i_sem);
+        mutex_lock(&vol->extend_ino->i_mutex);
        mref = ntfs_lookup_inode_by_name(NTFS_I(vol->extend_ino), UsnJrnl, 8,
                        &name);
-        up(&vol->extend_ino->i_sem);
+        mutex_unlock(&vol->extend_ino->i_mutex);
        if (IS_ERR_MREF(mref)) {
                /*
                 * If the file does not exist, transaction logging is disabled,
@@ -1721,7 +1721,7 @@ static BOOL load_system_files(ntfs_volume *vol)
                                                es3);
                                goto iput_mirr_err_out;
                        }
-                        sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+                        sb->s_flags |= MS_RDONLY;
                        ntfs_error(sb, "%s.  Mounting read-only%s",
                                        !vol->mftmirr_ino ? es1 : es2, es3);
                } else
@@ -1837,7 +1837,7 @@ get_ctx_vol_failed:
                                                es1, es2);
                                goto iput_vol_err_out;
                        }
-                        sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+                        sb->s_flags |= MS_RDONLY;
                        ntfs_error(sb, "%s.  Mounting read-only%s", es1, es2);
                } else
                        ntfs_warning(sb, "%s.  Will not be able to remount "
@@ -1874,7 +1874,7 @@ get_ctx_vol_failed:
                                }
                                goto iput_logfile_err_out;
                        }
-                        sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+                        sb->s_flags |= MS_RDONLY;
                        ntfs_error(sb, "%s.  Mounting read-only%s", es1, es2);
                } else
                        ntfs_warning(sb, "%s.  Will not be able to remount "
@@ -1919,7 +1919,7 @@ get_ctx_vol_failed:
                                                es1, es2);
                                goto iput_root_err_out;
                        }
-                        sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+                        sb->s_flags |= MS_RDONLY;
                        ntfs_error(sb, "%s.  Mounting read-only%s", es1, es2);
                } else
                        ntfs_warning(sb, "%s.  Will not be able to remount "
@@ -1943,7 +1943,7 @@ get_ctx_vol_failed:
                        goto iput_root_err_out;
                }
                ntfs_error(sb, "%s.  Mounting read-only%s", es1, es2);
-                sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+                sb->s_flags |= MS_RDONLY;
                /*
                 * Do not set NVolErrors() because ntfs_remount() might manage
                 * to set the dirty flag in which case all would be well.
@@ -1970,7 +1970,7 @@ get_ctx_vol_failed:
                        goto iput_root_err_out;
                }
                ntfs_error(sb, "%s.  Mounting read-only%s", es1, es2);
-                sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+                sb->s_flags |= MS_RDONLY;
                NVolSetErrors(vol);
        }
 #endif
@@ -1989,7 +1989,7 @@ get_ctx_vol_failed:
                        goto iput_root_err_out;
                }
                ntfs_error(sb, "%s.  Mounting read-only%s", es1, es2);
-                sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+                sb->s_flags |= MS_RDONLY;
                NVolSetErrors(vol);
        }
 #endif /* NTFS_RW */
@@ -2030,7 +2030,7 @@ get_ctx_vol_failed:
                                                es1, es2);
                                goto iput_quota_err_out;
                        }
-                        sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+                        sb->s_flags |= MS_RDONLY;
                        ntfs_error(sb, "%s.  Mounting read-only%s", es1, es2);
                } else
                        ntfs_warning(sb, "%s.  Will not be able to remount "
@@ -2053,7 +2053,7 @@ get_ctx_vol_failed:
                        goto iput_quota_err_out;
                }
                ntfs_error(sb, "%s.  Mounting read-only%s", es1, es2);
-                sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+                sb->s_flags |= MS_RDONLY;
                NVolSetErrors(vol);
        }
        /*
@@ -2074,7 +2074,7 @@ get_ctx_vol_failed:
                                                es1, es2);
                                goto iput_usnjrnl_err_out;
                        }
-                        sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+                        sb->s_flags |= MS_RDONLY;
                        ntfs_error(sb, "%s.  Mounting read-only%s", es1, es2);
                } else
                        ntfs_warning(sb, "%s.  Will not be able to remount "
@@ -2097,7 +2097,7 @@ get_ctx_vol_failed:
                        goto iput_usnjrnl_err_out;
                }
                ntfs_error(sb, "%s.  Mounting read-only%s", es1, es2);
-                sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+                sb->s_flags |= MS_RDONLY;
                NVolSetErrors(vol);
        }
 #endif /* NTFS_RW */
@@ -2312,9 +2312,9 @@ static void ntfs_put_super(struct super_block *sb)
        if (!list_empty(&sb->s_dirty)) {
                const char *s1, *s2;
-                down(&vol->mft_ino->i_sem);
+                mutex_lock(&vol->mft_ino->i_mutex);
                truncate_inode_pages(vol->mft_ino->i_mapping, 0);
-                up(&vol->mft_ino->i_sem);
+                mutex_unlock(&vol->mft_ino->i_mutex);
                write_inode_now(vol->mft_ino, 1);
                if (!list_empty(&sb->s_dirty)) {
                        static const char *_s1 = "inodes";
@@ -2689,7 +2689,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
        ntfs_debug("Entering.");
 #ifndef NTFS_RW
-        sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+        sb->s_flags |= MS_RDONLY;
 #endif /* ! NTFS_RW */
        /* Allocate a new ntfs_volume and place it in sb->s_fs_info. */
        sb->s_fs_info = kmalloc(sizeof(ntfs_volume), GFP_NOFS);
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 465f797451ee..6b9812db3779 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -966,7 +966,7 @@ static int ocfs2_truncate_log_append(struct ocfs2_super *osb,
        mlog_entry("start_blk = %"MLFu64", num_clusters = %u\n", start_blk,
                   num_clusters);
-        BUG_ON(!down_trylock(&tl_inode->i_sem));
+        BUG_ON(mutex_trylock(&tl_inode->i_mutex));
        start_cluster = ocfs2_blocks_to_clusters(osb->sb, start_blk);
@@ -1108,7 +1108,7 @@ bail:
        return status;
 }
-/* Expects you to already be holding tl_inode->i_sem */
+/* Expects you to already be holding tl_inode->i_mutex */
 static int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
 {
        int status;
@@ -1123,7 +1123,7 @@ static int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
        mlog_entry_void();
-        BUG_ON(!down_trylock(&tl_inode->i_sem));
+        BUG_ON(mutex_trylock(&tl_inode->i_mutex));
        di = (struct ocfs2_dinode *) tl_bh->b_data;
        tl = &di->id2.i_dealloc;
@@ -1198,9 +1198,9 @@ int ocfs2_flush_truncate_log(struct ocfs2_super *osb)
        int status;
        struct inode *tl_inode = osb->osb_tl_inode;
-        down(&tl_inode->i_sem);
+        mutex_lock(&tl_inode->i_mutex);
        status = __ocfs2_flush_truncate_log(osb);
-        up(&tl_inode->i_sem);
+        mutex_unlock(&tl_inode->i_mutex);
        return status;
 }
@@ -1363,7 +1363,7 @@ int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
        mlog(0, "cleanup %u records from %"MLFu64"\n", num_recs,
             tl_copy->i_blkno);
-        down(&tl_inode->i_sem);
+        mutex_lock(&tl_inode->i_mutex);
        for(i = 0; i < num_recs; i++) {
                if (ocfs2_truncate_log_needs_flush(osb)) {
                        status = __ocfs2_flush_truncate_log(osb);
@@ -1395,7 +1395,7 @@ int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
        }
 bail_up:
-        up(&tl_inode->i_sem);
+        mutex_unlock(&tl_inode->i_mutex);
        mlog_exit(status);
        return status;
@@ -1840,7 +1840,7 @@ start:
        mlog(0, "clusters_to_del = %u in this pass\n", clusters_to_del);
-        down(&tl_inode->i_sem);
+        mutex_lock(&tl_inode->i_mutex);
        tl_sem = 1;
        /* ocfs2_truncate_log_needs_flush guarantees us at least one
         * record is free for use. If there isn't any, we flush to get
@@ -1875,7 +1875,7 @@ start:
                goto bail;
        }
-        up(&tl_inode->i_sem);
+        mutex_unlock(&tl_inode->i_mutex);
        tl_sem = 0;
        ocfs2_commit_trans(handle);
@@ -1890,7 +1890,7 @@ bail:
        ocfs2_schedule_truncate_log_flush(osb, 1);
        if (tl_sem)
-                up(&tl_inode->i_sem);
+                mutex_unlock(&tl_inode->i_mutex);
        if (handle)
                ocfs2_commit_trans(handle);
@@ -1994,7 +1994,7 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb,
                        goto bail;
                }
-                down(&ext_alloc_inode->i_sem);
+                mutex_lock(&ext_alloc_inode->i_mutex);
                (*tc)->tc_ext_alloc_inode = ext_alloc_inode;
                status = ocfs2_meta_lock(ext_alloc_inode,
@@ -2026,7 +2026,7 @@ static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc)
                if (tc->tc_ext_alloc_locked)
                        ocfs2_meta_unlock(tc->tc_ext_alloc_inode, 1);
-                up(&tc->tc_ext_alloc_inode->i_sem);
+                mutex_unlock(&tc->tc_ext_alloc_inode->i_mutex);
                iput(tc->tc_ext_alloc_inode);
        }
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index f5ef5ea61a05..e8c56a3d9c64 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -212,11 +212,10 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits;
        mlog(ML_ENTRY, "ENTRY:\n");                                     \
 } while (0)
-/* We disable this for old compilers since they don't have support for
+/*
- * __builtin_types_compatible_p.
+ * We disable this for sparse.
 */
-#if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) && \
+#if !defined(__CHECKER__)
-    !defined(__CHECKER__)
 #define mlog_exit(st) do {                                                   \
        if (__builtin_types_compatible_p(typeof(st), unsigned long))         \
                mlog(ML_EXIT, "EXIT: %lu\n", (unsigned long) (st));          \
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index 5fd60c105913..cf7828f23361 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -653,7 +653,7 @@ static struct config_group *o2nm_cluster_group_make_group(struct config_group *g
        struct config_group *o2hb_group = NULL, *ret = NULL;
        void *defs = NULL;
-        /* this runs under the parent dir's i_sem; there can be only
+        /* this runs under the parent dir's i_mutex; there can be only
         * one caller in here at a time */
        if (o2nm_single_cluster)
                goto out; /* ENOSPC */
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 856e20ae8263..57158fa75d91 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -202,7 +202,7 @@ bail:
 }
 /*
- * NOTE: this should always be called with parent dir i_sem taken.
+ * NOTE: this should always be called with parent dir i_mutex taken.
 */
 int ocfs2_find_files_on_disk(const char *name,
                             int namelen,
@@ -245,7 +245,7 @@ leave:
 * Return 0 if the name does not exist
 * Return -EEXIST if the directory contains the name
 *
- * Callers should have i_sem + a cluster lock on dir
+ * Callers should have i_mutex + a cluster lock on dir
 */
 int ocfs2_check_dir_for_entry(struct inode *dir,
                              const char *name,
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 72ae9e3306f4..eaf33caa0a1f 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -23,6 +23,7 @@
 * Boston, MA 021110-1307, USA.
 */
+#include <linux/capability.h>
 #include <linux/fs.h>
 #include <linux/types.h>
 #include <linux/slab.h>
@@ -492,7 +493,7 @@ restart_all:
        }
        /* blocks peope in read/write from reading our allocation
-         * until we're done changing it. We depend on i_sem to block
+         * until we're done changing it. We depend on i_mutex to block
         * other extend/truncate calls while we're here. Ordering wrt
         * start_trans is important here -- always do it before! */
        down_write(&OCFS2_I(inode)->ip_alloc_sem);
@@ -958,8 +959,8 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
                filp->f_flags &= ~O_DIRECT;
 #endif
-        down(&inode->i_sem);
+        mutex_lock(&inode->i_mutex);
-        /* to match setattr's i_sem -> i_alloc_sem -> rw_lock ordering */
+        /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */
        if (filp->f_flags & O_DIRECT) {
                have_alloc_sem = 1;
                down_read(&inode->i_alloc_sem);
@@ -1123,7 +1124,7 @@ out:
                up_read(&inode->i_alloc_sem);
        if (rw_level != -1) 
                ocfs2_rw_unlock(inode, rw_level);
-        up(&inode->i_sem);
+        mutex_unlock(&inode->i_mutex);
        mlog_exit(ret);
        return ret;
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index a91ba4dec936..d4ecc0627716 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -485,10 +485,10 @@ static int ocfs2_remove_inode(struct inode *inode,
                goto bail;
        }
-        down(&inode_alloc_inode->i_sem);
+        mutex_lock(&inode_alloc_inode->i_mutex);
        status = ocfs2_meta_lock(inode_alloc_inode, NULL, &inode_alloc_bh, 1);
        if (status < 0) {
-                up(&inode_alloc_inode->i_sem);
+                mutex_unlock(&inode_alloc_inode->i_mutex);
                mlog_errno(status);
                goto bail;
@@ -536,7 +536,7 @@ bail_commit:
        ocfs2_commit_trans(handle);
 bail_unlock:
        ocfs2_meta_unlock(inode_alloc_inode, 1);
-        up(&inode_alloc_inode->i_sem);
+        mutex_unlock(&inode_alloc_inode->i_mutex);
        brelse(inode_alloc_bh);
 bail:
        iput(inode_alloc_inode);
@@ -567,10 +567,10 @@ static int ocfs2_wipe_inode(struct inode *inode,
        /* Lock the orphan dir. The lock will be held for the entire
         * delete_inode operation. We do this now to avoid races with
         * recovery completion on other nodes. */
-        down(&orphan_dir_inode->i_sem);
+        mutex_lock(&orphan_dir_inode->i_mutex);
        status = ocfs2_meta_lock(orphan_dir_inode, NULL, &orphan_dir_bh, 1);
        if (status < 0) {
-                up(&orphan_dir_inode->i_sem);
+                mutex_unlock(&orphan_dir_inode->i_mutex);
                mlog_errno(status);
                goto bail;
@@ -593,7 +593,7 @@ static int ocfs2_wipe_inode(struct inode *inode,
 bail_unlock_dir:
        ocfs2_meta_unlock(orphan_dir_inode, 1);
-        up(&orphan_dir_inode->i_sem);
+        mutex_unlock(&orphan_dir_inode->i_mutex);
        brelse(orphan_dir_bh);
 bail:
        iput(orphan_dir_inode);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 04428042e5e5..303c8d96457f 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -216,7 +216,7 @@ void ocfs2_handle_add_inode(struct ocfs2_journal_handle *handle,
        atomic_inc(&inode->i_count);
        /* we're obviously changing it... */
-        down(&inode->i_sem);
+        mutex_lock(&inode->i_mutex);
        /* sanity check */
        BUG_ON(OCFS2_I(inode)->ip_handle);
@@ -241,7 +241,7 @@ static void ocfs2_handle_unlock_inodes(struct ocfs2_journal_handle *handle)
                OCFS2_I(inode)->ip_handle = NULL;
                list_del_init(&OCFS2_I(inode)->ip_handle_list);
-                up(&inode->i_sem);
+                mutex_unlock(&inode->i_mutex);
                iput(inode);
        }
 }
@@ -1433,10 +1433,10 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
                goto out;
        }
-        down(&orphan_dir_inode->i_sem);
+        mutex_lock(&orphan_dir_inode->i_mutex);
        status = ocfs2_meta_lock(orphan_dir_inode, NULL, NULL, 0);
        if (status < 0) {
-                up(&orphan_dir_inode->i_sem);
+                mutex_unlock(&orphan_dir_inode->i_mutex);
                mlog_errno(status);
                goto out;
        }
@@ -1451,7 +1451,7 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
                if (!bh)
                        status = -EINVAL;
                if (status < 0) {
-                        up(&orphan_dir_inode->i_sem);
+                        mutex_unlock(&orphan_dir_inode->i_mutex);
                        if (bh)
                                brelse(bh);
                        mlog_errno(status);
@@ -1465,7 +1465,7 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
                        if (!ocfs2_check_dir_entry(orphan_dir_inode,
                                                  de, bh, local)) {
-                                up(&orphan_dir_inode->i_sem);
+                                mutex_unlock(&orphan_dir_inode->i_mutex);
                                status = -EINVAL;
                                mlog_errno(status);
                                brelse(bh);
@@ -1509,7 +1509,7 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
                }
                brelse(bh);
        }
-        up(&orphan_dir_inode->i_sem);
+        mutex_unlock(&orphan_dir_inode->i_mutex);
        ocfs2_meta_unlock(orphan_dir_inode, 0);
        have_disk_lock = 0;
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index fe373a2101d9..149b35181666 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -334,7 +334,7 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
                goto bail;
        }
-        down(&inode->i_sem);
+        mutex_lock(&inode->i_mutex);
        status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno,
                                  &alloc_bh, 0, inode);
@@ -367,7 +367,7 @@ bail:
                brelse(alloc_bh);
        if (inode) {
-                up(&inode->i_sem);
+                mutex_unlock(&inode->i_mutex);
                iput(inode);
        }
@@ -446,7 +446,7 @@ bail:
 /*
 * make sure we've got at least bitswanted contiguous bits in the
- * local alloc. You lose them when you drop i_sem.
+ * local alloc. You lose them when you drop i_mutex.
 *
 * We will add ourselves to the transaction passed in, but may start
 * our own in order to shift windows.
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index afdeec4b0eef..843cf9ddefe8 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -80,12 +80,8 @@ static struct vm_operations_struct ocfs2_file_vm_ops = {
        .nopage = ocfs2_nopage,
 };
-int ocfs2_mmap(struct file *file,
+int ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
-               struct vm_area_struct *vma)
 {
-        struct address_space *mapping = file->f_dentry->d_inode->i_mapping;
-        struct inode *inode = mapping->host;
        /* We don't want to support shared writable mappings yet. */
        if (((vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_MAYSHARE))
            && ((vma->vm_flags & VM_WRITE) || (vma->vm_flags & VM_MAYWRITE))) {
@@ -95,7 +91,7 @@ int ocfs2_mmap(struct file *file,
                return -EINVAL;
        }
-        update_atime(inode);
+        file_accessed(file);
        vma->vm_ops = &ocfs2_file_vm_ops;
        return 0;
 }
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 48bf7f0ce544..364d64bd5f10 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -169,7 +169,7 @@ static match_table_t tokens = {
 */
 static void ocfs2_write_super(struct super_block *sb)
 {
-        if (down_trylock(&sb->s_lock) == 0)
+        if (mutex_trylock(&sb->s_lock) != 0)
                BUG();
        sb->s_dirt = 0;
 }
diff --git a/fs/open.c b/fs/open.c
index f53a5b9ffb7d..70e0230d8e77 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -16,9 +16,11 @@
 #include <linux/tty.h>
 #include <linux/namei.h>
 #include <linux/backing-dev.h>
+#include <linux/capability.h>
 #include <linux/security.h>
 #include <linux/mount.h>
 #include <linux/vfs.h>
+#include <linux/fcntl.h>
 #include <asm/uaccess.h>
 #include <linux/fs.h>
 #include <linux/personality.h>
@@ -194,7 +196,8 @@ out:
        return error;
 }
-int do_truncate(struct dentry *dentry, loff_t length, struct file *filp)
+int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
+        struct file *filp)
 {
        int err;
        struct iattr newattrs;
@@ -204,19 +207,19 @@ int do_truncate(struct dentry *dentry, loff_t length, struct file *filp)
                return -EINVAL;
        newattrs.ia_size = length;
-        newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
+        newattrs.ia_valid = ATTR_SIZE | time_attrs;
        if (filp) {
                newattrs.ia_file = filp;
                newattrs.ia_valid |= ATTR_FILE;
        }
-        down(&dentry->d_inode->i_sem);
+        mutex_lock(&dentry->d_inode->i_mutex);
        err = notify_change(dentry, &newattrs);
-        up(&dentry->d_inode->i_sem);
+        mutex_unlock(&dentry->d_inode->i_mutex);
        return err;
 }
-static inline long do_sys_truncate(const char __user * path, loff_t length)
+static long do_sys_truncate(const char __user * path, loff_t length)
 {
        struct nameidata nd;
        struct inode * inode;
@@ -266,7 +269,7 @@ static inline long do_sys_truncate(const char __user * path, loff_t length)
        error = locks_verify_truncate(inode, NULL, length);
        if (!error) {
                DQUOT_INIT(inode);
-                error = do_truncate(nd.dentry, length, NULL);
+                error = do_truncate(nd.dentry, length, 0, NULL);
        }
        put_write_access(inode);
@@ -282,7 +285,7 @@ asmlinkage long sys_truncate(const char __user * path, unsigned long length)
        return do_sys_truncate(path, (long)length);
 }
-static inline long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
+static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
 {
        struct inode * inode;
        struct dentry *dentry;
@@ -318,7 +321,7 @@ static inline long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
        error = locks_verify_truncate(inode, file, length);
        if (!error)
-                error = do_truncate(dentry, length, file);
+                error = do_truncate(dentry, length, 0, file);
 out_putf:
        fput(file);
 out:
@@ -381,7 +384,7 @@ asmlinkage long sys_utime(char __user * filename, struct utimbuf __user * times)
                error = get_user(newattrs.ia_atime.tv_sec, &times->actime);
                newattrs.ia_atime.tv_nsec = 0;
-                if (!error) 
+                if (!error)
                        error = get_user(newattrs.ia_mtime.tv_sec, &times->modtime);
                newattrs.ia_mtime.tv_nsec = 0;
                if (error)
@@ -397,9 +400,9 @@ asmlinkage long sys_utime(char __user * filename, struct utimbuf __user * times)
                    (error = vfs_permission(&nd, MAY_WRITE)) != 0)
                        goto dput_and_out;
        }
-        down(&inode->i_sem);
+        mutex_lock(&inode->i_mutex);
        error = notify_change(nd.dentry, &newattrs);
-        up(&inode->i_sem);
+        mutex_unlock(&inode->i_mutex);
 dput_and_out:
        path_release(&nd);
 out:
@@ -412,14 +415,14 @@ out:
 * must be owner or have write permission.
 * Else, update from *times, must be owner or super user.
 */
-long do_utimes(char __user * filename, struct timeval * times)
+long do_utimes(int dfd, char __user *filename, struct timeval *times)
 {
        int error;
        struct nameidata nd;
        struct inode * inode;
        struct iattr newattrs;
-        error = user_path_walk(filename, &nd);
+        error = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd);
        if (error)
                goto out;
@@ -450,22 +453,27 @@ long do_utimes(char __user * filename, struct timeval * times)
                    (error = vfs_permission(&nd, MAY_WRITE)) != 0)
                        goto dput_and_out;
        }
-        down(&inode->i_sem);
+        mutex_lock(&inode->i_mutex);
        error = notify_change(nd.dentry, &newattrs);
-        up(&inode->i_sem);
+        mutex_unlock(&inode->i_mutex);
 dput_and_out:
        path_release(&nd);
 out:
        return error;
 }
-asmlinkage long sys_utimes(char __user * filename, struct timeval __user * utimes)
+asmlinkage long sys_futimesat(int dfd, char __user *filename, struct timeval __user *utimes)
 {
        struct timeval times[2];
        if (utimes && copy_from_user(&times, utimes, sizeof(times)))
                return -EFAULT;
-        return do_utimes(filename, utimes ? times : NULL);
+        return do_utimes(dfd, filename, utimes ? times : NULL);
+}
+asmlinkage long sys_utimes(char __user *filename, struct timeval __user *utimes)
+{
+        return sys_futimesat(AT_FDCWD, filename, utimes);
 }
@@ -474,7 +482,7 @@ asmlinkage long sys_utimes(char __user * filename, struct timeval __user * utime
 * We do this by temporarily clearing all FS-related capabilities and
 * switching the fsuid/fsgid around to the real ones.
 */
-asmlinkage long sys_access(const char __user * filename, int mode)
+asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
 {
        struct nameidata nd;
        int old_fsuid, old_fsgid;
@@ -504,7 +512,7 @@ asmlinkage long sys_access(const char __user * filename, int mode)
        else
                current->cap_effective = current->cap_permitted;
-        res = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd);
+        res = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd);
        if (!res) {
                res = vfs_permission(&nd, mode);
                /* SuS v2 requires we report a read only fs too */
@@ -521,6 +529,11 @@ asmlinkage long sys_access(const char __user * filename, int mode)
        return res;
 }
+asmlinkage long sys_access(const char __user *filename, int mode)
+{
+        return sys_faccessat(AT_FDCWD, filename, mode);
+}
 asmlinkage long sys_chdir(const char __user * filename)
 {
        struct nameidata nd;
@@ -619,13 +632,13 @@ asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
        err = -EPERM;
        if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
                goto out_putf;
-        down(&inode->i_sem);
+        mutex_lock(&inode->i_mutex);
        if (mode == (mode_t) -1)
                mode = inode->i_mode;
        newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
        newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
        err = notify_change(dentry, &newattrs);
-        up(&inode->i_sem);
+        mutex_unlock(&inode->i_mutex);
 out_putf:
        fput(file);
@@ -633,14 +646,15 @@ out:
        return err;
 }
-asmlinkage long sys_chmod(const char __user * filename, mode_t mode)
+asmlinkage long sys_fchmodat(int dfd, const char __user *filename,
+                             mode_t mode)
 {
        struct nameidata nd;
        struct inode * inode;
        int error;
        struct iattr newattrs;
-        error = user_path_walk(filename, &nd);
+        error = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd);
        if (error)
                goto out;
        inode = nd.dentry->d_inode;
@@ -653,13 +667,13 @@ asmlinkage long sys_chmod(const char __user * filename, mode_t mode)
        if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
                goto dput_and_out;
-        down(&inode->i_sem);
+        mutex_lock(&inode->i_mutex);
        if (mode == (mode_t) -1)
                mode = inode->i_mode;
        newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
        newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
        error = notify_change(nd.dentry, &newattrs);
-        up(&inode->i_sem);
+        mutex_unlock(&inode->i_mutex);
 dput_and_out:
        path_release(&nd);
@@ -667,6 +681,11 @@ out:
        return error;
 }
+asmlinkage long sys_chmod(const char __user *filename, mode_t mode)
+{
+        return sys_fchmodat(AT_FDCWD, filename, mode);
+}
 static int chown_common(struct dentry * dentry, uid_t user, gid_t group)
 {
        struct inode * inode;
@@ -695,9 +714,9 @@ static int chown_common(struct dentry * dentry, uid_t user, gid_t group)
        }
        if (!S_ISDIR(inode->i_mode))
                newattrs.ia_valid |= ATTR_KILL_SUID|ATTR_KILL_SGID;
-        down(&inode->i_sem);
+        mutex_lock(&inode->i_mutex);
        error = notify_change(dentry, &newattrs);
-        up(&inode->i_sem);
+        mutex_unlock(&inode->i_mutex);
 out:
        return error;
 }
@@ -715,6 +734,26 @@ asmlinkage long sys_chown(const char __user * filename, uid_t user, gid_t group)
        return error;
 }
+asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user,
+                             gid_t group, int flag)
+{
+        struct nameidata nd;
+        int error = -EINVAL;
+        int follow;
+        if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
+                goto out;
+        follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
+        error = __user_walk_fd(dfd, filename, follow, &nd);
+        if (!error) {
+                error = chown_common(nd.dentry, user, group);
+                path_release(&nd);
+        }
+out:
+        return error;
+}
 asmlinkage long sys_lchown(const char __user * filename, uid_t user, gid_t group)
 {
        struct nameidata nd;
@@ -818,7 +857,8 @@ cleanup_file:
 * for the internal routines (ie open_namei()/follow_link() etc). 00 is
 * used by symlinks.
 */
-struct file *filp_open(const char * filename, int flags, int mode)
+static struct file *do_filp_open(int dfd, const char *filename, int flags,
+                                 int mode)
 {
        int namei_flags, error;
        struct nameidata nd;
@@ -827,12 +867,17 @@ struct file *filp_open(const char * filename, int flags, int mode)
        if ((namei_flags+1) & O_ACCMODE)
                namei_flags++;
-        error = open_namei(filename, namei_flags, mode, &nd);
+        error = open_namei(dfd, filename, namei_flags, mode, &nd);
        if (!error)
                return nameidata_to_filp(&nd, flags);
        return ERR_PTR(error);
 }
+struct file *filp_open(const char *filename, int flags, int mode)
+{
+        return do_filp_open(AT_FDCWD, filename, flags, mode);
+}
 EXPORT_SYMBOL(filp_open);
 /**
@@ -970,7 +1015,7 @@ out:
 EXPORT_SYMBOL(get_unused_fd);
-static inline void __put_unused_fd(struct files_struct *files, unsigned int fd)
+static void __put_unused_fd(struct files_struct *files, unsigned int fd)
 {
        struct fdtable *fdt = files_fdtable(files);
        __FD_CLR(fd, fdt->open_fds);
@@ -989,7 +1034,7 @@ void fastcall put_unused_fd(unsigned int fd)
 EXPORT_SYMBOL(put_unused_fd);
 /*
- * Install a file pointer in the fd array.  
+ * Install a file pointer in the fd array.
 *
 * The VFS is full of places where we drop the files lock between
 * setting the open_fds bitmap and installing the file in the file
@@ -1014,7 +1059,7 @@ void fastcall fd_install(unsigned int fd, struct file * file)
 EXPORT_SYMBOL(fd_install);
-long do_sys_open(const char __user *filename, int flags, int mode)
+long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
 {
        char *tmp = getname(filename);
        int fd = PTR_ERR(tmp);
@@ -1022,7 +1067,7 @@ long do_sys_open(const char __user *filename, int flags, int mode)
        if (!IS_ERR(tmp)) {
                fd = get_unused_fd();
                if (fd >= 0) {
-                        struct file *f = filp_open(tmp, flags, mode);
+                        struct file *f = do_filp_open(dfd, tmp, flags, mode);
                        if (IS_ERR(f)) {
                                put_unused_fd(fd);
                                fd = PTR_ERR(f);
@@ -1041,10 +1086,20 @@ asmlinkage long sys_open(const char __user *filename, int flags, int mode)
        if (force_o_largefile())
                flags |= O_LARGEFILE;
-        return do_sys_open(filename, flags, mode);
+        return do_sys_open(AT_FDCWD, filename, flags, mode);
 }
 EXPORT_SYMBOL_GPL(sys_open);
+asmlinkage long sys_openat(int dfd, const char __user *filename, int flags,
+                           int mode)
+{
+        if (force_o_largefile())
+                flags |= O_LARGEFILE;
+        return do_sys_open(dfd, filename, flags, mode);
+}
+EXPORT_SYMBOL_GPL(sys_openat);
 #ifndef __alpha__
 /*
diff --git a/fs/partitions/Kconfig b/fs/partitions/Kconfig
index e227a04261ab..c9a478099281 100644
--- a/fs/partitions/Kconfig
+++ b/fs/partitions/Kconfig
@@ -21,26 +21,30 @@ config ACORN_PARTITION
          Support hard disks partitioned under Acorn operating systems.
 config ACORN_PARTITION_CUMANA
-        bool "Cumana partition support" if PARTITION_ADVANCED && ACORN_PARTITION
+        bool "Cumana partition support" if PARTITION_ADVANCED
        default y if ARCH_ACORN
+        depends on ACORN_PARTITION
        help
          Say Y here if you would like to use hard disks under Linux which
          were partitioned using the Cumana interface on Acorn machines.
 config ACORN_PARTITION_EESOX
-        bool "EESOX partition support" if PARTITION_ADVANCED && ACORN_PARTITION
+        bool "EESOX partition support" if PARTITION_ADVANCED
        default y if ARCH_ACORN
+        depends on ACORN_PARTITION
 config ACORN_PARTITION_ICS
-        bool "ICS partition support" if PARTITION_ADVANCED && ACORN_PARTITION
+        bool "ICS partition support" if PARTITION_ADVANCED
        default y if ARCH_ACORN
+        depends on ACORN_PARTITION
        help
          Say Y here if you would like to use hard disks under Linux which
          were partitioned using the ICS interface on Acorn machines.
 config ACORN_PARTITION_ADFS
-        bool "Native filecore partition support" if PARTITION_ADVANCED && ACORN_PARTITION
+        bool "Native filecore partition support" if PARTITION_ADVANCED
        default y if ARCH_ACORN
+        depends on ACORN_PARTITION
        help
          The Acorn Disc Filing System is the standard file system of the
          RiscOS operating system which runs on Acorn's ARM-based Risc PC
@@ -48,15 +52,17 @@ config ACORN_PARTITION_ADFS
          `Y' here, Linux will support disk partitions created under ADFS.
 config ACORN_PARTITION_POWERTEC
-        bool "PowerTec partition support" if PARTITION_ADVANCED && ACORN_PARTITION
+        bool "PowerTec partition support" if PARTITION_ADVANCED
        default y if ARCH_ACORN
+        depends on ACORN_PARTITION
        help
          Support reading partition tables created on Acorn machines using
          the PowerTec SCSI drive.
 config ACORN_PARTITION_RISCIX
-        bool "RISCiX partition support" if PARTITION_ADVANCED && ACORN_PARTITION
+        bool "RISCiX partition support" if PARTITION_ADVANCED
        default y if ARCH_ACORN
+        depends on ACORN_PARTITION
        help
          Once upon a time, there was a native Unix port for the Acorn series
          of machines called RISCiX.  If you say 'Y' here, Linux will be able
@@ -216,6 +222,13 @@ config SUN_PARTITION
          given by the tar program ("man tar" or preferably "info tar"). If
          you don't know what all this is about, say N.
+config KARMA_PARTITION
+        bool "Karma Partition support"
+        depends on PARTITION_ADVANCED
+        help
+          Say Y here if you would like to mount the Rio Karma MP3 player, as it
+          uses a proprietary partition table.
 config EFI_PARTITION
        bool "EFI GUID Partition support"
        depends on PARTITION_ADVANCED
@@ -224,5 +237,3 @@ config EFI_PARTITION
          Say Y here if you would like to use hard disks under Linux which
          were partitioned using EFI GPT.  Presently only useful on the
          IA-64 platform.
-#      define_bool CONFIG_ACORN_PARTITION_CUMANA y
diff --git a/fs/partitions/Makefile b/fs/partitions/Makefile
index 66d5cc26fafb..42c7d3878ed0 100644
--- a/fs/partitions/Makefile
+++ b/fs/partitions/Makefile
@@ -17,3 +17,4 @@ obj-$(CONFIG_SUN_PARTITION) += sun.o
 obj-$(CONFIG_ULTRIX_PARTITION) += ultrix.o
 obj-$(CONFIG_IBM_PARTITION) += ibm.o
 obj-$(CONFIG_EFI_PARTITION) += efi.o
+obj-$(CONFIG_KARMA_PARTITION) += karma.o
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 7881ce05daef..f924f459bdb8 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -35,6 +35,7 @@
 #include "ibm.h"
 #include "ultrix.h"
 #include "efi.h"
+#include "karma.h"
 #ifdef CONFIG_BLK_DEV_MD
 extern void md_autodetect_dev(dev_t dev);
@@ -103,6 +104,9 @@ static int (*check_part[])(struct parsed_partitions *, struct block_device *) =
 #ifdef CONFIG_IBM_PARTITION
        ibm_partition,
 #endif
+#ifdef CONFIG_KARMA_PARTITION
+        karma_partition,
+#endif
        NULL
 };
 
diff --git a/fs/partitions/karma.c b/fs/partitions/karma.c
new file mode 100644
index 000000000000..176d89bcf123
--- /dev/null
+++ b/fs/partitions/karma.c
@@ -0,0 +1,57 @@
+/*
+ *  fs/partitions/karma.c
+ *  Rio Karma partition info.
+ *
+ *  Copyright (C) 2006 Bob Copeland (me@bobcopeland.com)
+ *  based on osf.c
+ */
+#include "check.h"
+#include "karma.h"
+int karma_partition(struct parsed_partitions *state, struct block_device *bdev)
+{
+        int i;
+        int slot = 1;
+        Sector sect;
+        unsigned char *data;
+        struct disklabel {
+                u8 d_reserved[270];
+                struct d_partition {
+                        __le32 p_res;
+                        u8 p_fstype;
+                        u8 p_res2[3];
+                        __le32 p_offset;
+                        __le32 p_size;
+                } d_partitions[2];
+                u8 d_blank[208];
+                __le16 d_magic;
+        } __attribute__((packed)) *label;
+        struct d_partition *p;
+        data = read_dev_sector(bdev, 0, &sect);
+        if (!data)
+                return -1;
+        label = (struct disklabel *)data;
+        if (le16_to_cpu(label->d_magic) != KARMA_LABEL_MAGIC) {
+                put_dev_sector(sect);
+                return 0;
+        }
+        p = label->d_partitions;
+        for (i = 0 ; i < 2; i++, p++) {
+                if (slot == state->limit)
+                        break;
+                if (p->p_fstype == 0x4d && le32_to_cpu(p->p_size)) {
+                        put_partition(state, slot, le32_to_cpu(p->p_offset),
+                                le32_to_cpu(p->p_size));
+                }
+                slot++;
+        }
+        printk("\n");
+        put_dev_sector(sect);
+        return 1;
+}
diff --git a/fs/partitions/karma.h b/fs/partitions/karma.h
new file mode 100644
index 000000000000..ecf7d3f2a3d8
--- /dev/null
+++ b/fs/partitions/karma.h
@@ -0,0 +1,8 @@
+/*
+ *  fs/partitions/karma.h
+ */
+#define KARMA_LABEL_MAGIC               0xAB56
+int karma_partition(struct parsed_partitions *state, struct block_device *bdev);
diff --git a/fs/pipe.c b/fs/pipe.c
index 66aa0b938d6a..d722579df79a 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -44,13 +44,13 @@ void pipe_wait(struct inode * inode)
         * is considered a noninteractive wait:
         */
        prepare_to_wait(PIPE_WAIT(*inode), &wait, TASK_INTERRUPTIBLE|TASK_NONINTERACTIVE);
-        up(PIPE_SEM(*inode));
+        mutex_unlock(PIPE_MUTEX(*inode));
        schedule();
        finish_wait(PIPE_WAIT(*inode), &wait);
-        down(PIPE_SEM(*inode));
+        mutex_lock(PIPE_MUTEX(*inode));
 }
-static inline int
+static int
 pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len)
 {
        unsigned long copy;
@@ -70,7 +70,7 @@ pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len)
        return 0;
 }
-static inline int
+static int
 pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len)
 {
        unsigned long copy;
@@ -136,7 +136,7 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
        do_wakeup = 0;
        ret = 0;
-        down(PIPE_SEM(*inode));
+        mutex_lock(PIPE_MUTEX(*inode));
        info = inode->i_pipe;
        for (;;) {
                int bufs = info->nrbufs;
@@ -200,7 +200,7 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
                }
                pipe_wait(inode);
        }
-        up(PIPE_SEM(*inode));
+        mutex_unlock(PIPE_MUTEX(*inode));
        /* Signal writers asynchronously that there is more room.  */
        if (do_wakeup) {
                wake_up_interruptible(PIPE_WAIT(*inode));
@@ -237,7 +237,7 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
        do_wakeup = 0;
        ret = 0;
-        down(PIPE_SEM(*inode));
+        mutex_lock(PIPE_MUTEX(*inode));
        info = inode->i_pipe;
        if (!PIPE_READERS(*inode)) {
@@ -341,13 +341,13 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
                PIPE_WAITING_WRITERS(*inode)--;
        }
 out:
-        up(PIPE_SEM(*inode));
+        mutex_unlock(PIPE_MUTEX(*inode));
        if (do_wakeup) {
                wake_up_interruptible(PIPE_WAIT(*inode));
                kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
        }
        if (ret > 0)
-                inode_update_time(inode, 1);    /* mtime and ctime */
+                file_update_time(filp);
        return ret;
 }
@@ -381,7 +381,7 @@ pipe_ioctl(struct inode *pino, struct file *filp,
        switch (cmd) {
                case FIONREAD:
-                        down(PIPE_SEM(*inode));
+                        mutex_lock(PIPE_MUTEX(*inode));
                        info =  inode->i_pipe;
                        count = 0;
                        buf = info->curbuf;
@@ -390,7 +390,7 @@ pipe_ioctl(struct inode *pino, struct file *filp,
                                count += info->bufs[buf].len;
                                buf = (buf+1) & (PIPE_BUFFERS-1);
                        }
-                        up(PIPE_SEM(*inode));
+                        mutex_unlock(PIPE_MUTEX(*inode));
                        return put_user(count, (int __user *)arg);
                default:
                        return -EINVAL;
@@ -433,7 +433,7 @@ pipe_poll(struct file *filp, poll_table *wait)
 static int
 pipe_release(struct inode *inode, int decr, int decw)
 {
-        down(PIPE_SEM(*inode));
+        mutex_lock(PIPE_MUTEX(*inode));
        PIPE_READERS(*inode) -= decr;
        PIPE_WRITERS(*inode) -= decw;
        if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) {
@@ -443,7 +443,7 @@ pipe_release(struct inode *inode, int decr, int decw)
                kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
                kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
        }
-        up(PIPE_SEM(*inode));
+        mutex_unlock(PIPE_MUTEX(*inode));
        return 0;
 }
@@ -454,9 +454,9 @@ pipe_read_fasync(int fd, struct file *filp, int on)
        struct inode *inode = filp->f_dentry->d_inode;
        int retval;
-        down(PIPE_SEM(*inode));
+        mutex_lock(PIPE_MUTEX(*inode));
        retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode));
-        up(PIPE_SEM(*inode));
+        mutex_unlock(PIPE_MUTEX(*inode));
        if (retval < 0)
                return retval;
@@ -471,9 +471,9 @@ pipe_write_fasync(int fd, struct file *filp, int on)
        struct inode *inode = filp->f_dentry->d_inode;
        int retval;
-        down(PIPE_SEM(*inode));
+        mutex_lock(PIPE_MUTEX(*inode));
        retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode));
-        up(PIPE_SEM(*inode));
+        mutex_unlock(PIPE_MUTEX(*inode));
        if (retval < 0)
                return retval;
@@ -488,14 +488,14 @@ pipe_rdwr_fasync(int fd, struct file *filp, int on)
        struct inode *inode = filp->f_dentry->d_inode;
        int retval;
-        down(PIPE_SEM(*inode));
+        mutex_lock(PIPE_MUTEX(*inode));
        retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode));
        if (retval >= 0)
                retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode));
-        up(PIPE_SEM(*inode));
+        mutex_unlock(PIPE_MUTEX(*inode));
        if (retval < 0)
                return retval;
@@ -534,9 +534,9 @@ pipe_read_open(struct inode *inode, struct file *filp)
 {
        /* We could have perhaps used atomic_t, but this and friends
           below are the only places.  So it doesn't seem worthwhile.  */
-        down(PIPE_SEM(*inode));
+        mutex_lock(PIPE_MUTEX(*inode));
        PIPE_READERS(*inode)++;
-        up(PIPE_SEM(*inode));
+        mutex_unlock(PIPE_MUTEX(*inode));
        return 0;
 }
@@ -544,9 +544,9 @@ pipe_read_open(struct inode *inode, struct file *filp)
 static int
 pipe_write_open(struct inode *inode, struct file *filp)
 {
-        down(PIPE_SEM(*inode));
+        mutex_lock(PIPE_MUTEX(*inode));
        PIPE_WRITERS(*inode)++;
-        up(PIPE_SEM(*inode));
+        mutex_unlock(PIPE_MUTEX(*inode));
        return 0;
 }
@@ -554,12 +554,12 @@ pipe_write_open(struct inode *inode, struct file *filp)
 static int
 pipe_rdwr_open(struct inode *inode, struct file *filp)
 {
-        down(PIPE_SEM(*inode));
+        mutex_lock(PIPE_MUTEX(*inode));
        if (filp->f_mode & FMODE_READ)
                PIPE_READERS(*inode)++;
        if (filp->f_mode & FMODE_WRITE)
                PIPE_WRITERS(*inode)++;
-        up(PIPE_SEM(*inode));
+        mutex_unlock(PIPE_MUTEX(*inode));
        return 0;
 }
diff --git a/fs/pnode.c b/fs/pnode.c
index aeeec8ba8dd2..f1871f773f64 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -103,7 +103,7 @@ static struct vfsmount *propagation_next(struct vfsmount *m,
                struct vfsmount *next;
                struct vfsmount *master = m->mnt_master;
-                if ( master == origin->mnt_master ) {
+                if (master == origin->mnt_master) {
                        next = next_peer(m);
                        return ((next == origin) ? NULL : next);
                } else if (m->mnt_slave.next != &master->mnt_slave_list)
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 5e9251f65317..7eb1bd7f800c 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -330,7 +330,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
        unsigned long  min_flt = 0,  maj_flt = 0;
        cputime_t cutime, cstime, utime, stime;
        unsigned long rsslim = 0;
-        unsigned long it_real_value = 0;
+        DEFINE_KTIME(it_real_value);
        struct task_struct *t;
        char tcomm[sizeof(task->comm)];
@@ -386,7 +386,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
                        utime = cputime_add(utime, task->signal->utime);
                        stime = cputime_add(stime, task->signal->stime);
                }
-                it_real_value = task->signal->it_real_value;
+                it_real_value = task->signal->real_timer.expires;
        }
        ppid = pid_alive(task) ? task->group_leader->real_parent->tgid : 0;
        read_unlock(&tasklist_lock);
@@ -435,7 +435,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
                priority,
                nice,
                num_threads,
-                jiffies_to_clock_t(it_real_value),
+                (long) ktime_to_clock_t(it_real_value),
                start_time,
                vsize,
                mm ? get_mm_rss(mm) : 0,
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 634355e16986..20feb7568deb 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -55,6 +55,7 @@
 #include <linux/proc_fs.h>
 #include <linux/stat.h>
 #include <linux/init.h>
+#include <linux/capability.h>
 #include <linux/file.h>
 #include <linux/string.h>
 #include <linux/seq_file.h>
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 72b431d0a0a4..20e5c4509a43 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -21,6 +21,8 @@
 #include <linux/bitops.h>
 #include <asm/uaccess.h>
+#include "internal.h"
 static ssize_t proc_file_read(struct file *file, char __user *buf,
                              size_t nbytes, loff_t *ppos);
 static ssize_t proc_file_write(struct file *file, const char __user *buffer,
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index e6a818a93f3d..6573f31f1fd9 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -19,7 +19,7 @@
 #include <asm/system.h>
 #include <asm/uaccess.h>
-extern void free_proc_entry(struct proc_dir_entry *);
+#include "internal.h"
 static inline struct proc_dir_entry * de_get(struct proc_dir_entry *de)
 {
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 3e55198f9806..95a1cf32b838 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -37,6 +37,10 @@ extern int proc_tgid_stat(struct task_struct *, char *);
 extern int proc_pid_status(struct task_struct *, char *);
 extern int proc_pid_statm(struct task_struct *, char *);
+void free_proc_entry(struct proc_dir_entry *de);
+int proc_init_inodecache(void);
 static inline struct task_struct *proc_task(struct inode *inode)
 {
        return PROC_I(inode)->task;
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 1c7da988fcc3..adc2cd95169a 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -14,6 +14,7 @@
 #include <linux/proc_fs.h>
 #include <linux/user.h>
 #include <linux/a.out.h>
+#include <linux/capability.h>
 #include <linux/elf.h>
 #include <linux/elfcore.h>
 #include <linux/vmalloc.h>
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index fb117b74809e..9bdd077d6f55 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -81,6 +81,30 @@ void proc_device_tree_add_prop(struct proc_dir_entry *pde, struct property *prop
        __proc_device_tree_add_prop(pde, prop);
 }
+void proc_device_tree_remove_prop(struct proc_dir_entry *pde,
+                                  struct property *prop)
+{
+        remove_proc_entry(prop->name, pde);
+}
+void proc_device_tree_update_prop(struct proc_dir_entry *pde,
+                                  struct property *newprop,
+                                  struct property *oldprop)
+{
+        struct proc_dir_entry *ent;
+        for (ent = pde->subdir; ent != NULL; ent = ent->next)
+                if (ent->data == oldprop)
+                        break;
+        if (ent == NULL) {
+                printk(KERN_WARNING "device-tree: property \"%s\" "
+                       " does not exist\n", oldprop->name);
+        } else {
+                ent->data = newprop;
+                ent->size = newprop->length;
+        }
+}
 /*
 * Process a node, adding entries for its children and its properties.
 */
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 5b6b0b6038a7..8f8014285a34 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -20,6 +20,7 @@
 #include <linux/time.h>
 #include <linux/kernel.h>
 #include <linux/kernel_stat.h>
+#include <linux/fs.h>
 #include <linux/tty.h>
 #include <linux/string.h>
 #include <linux/mman.h>
@@ -62,7 +63,6 @@
 */
 extern int get_hardware_list(char *);
 extern int get_stram_list(char *);
-extern int get_chrdev_list(char *);
 extern int get_filesystem_list(char *);
 extern int get_exec_domain_list(char *);
 extern int get_dma_list(char *);
@@ -248,6 +248,154 @@ static int cpuinfo_open(struct inode *inode, struct file *file)
 {
        return seq_open(file, &cpuinfo_op);
 }
+enum devinfo_states {
+        CHR_HDR,
+        CHR_LIST,
+        BLK_HDR,
+        BLK_LIST,
+        DEVINFO_DONE
+};
+struct devinfo_state {
+        void *chrdev;
+        void *blkdev;
+        unsigned int num_records;
+        unsigned int cur_record;
+        enum devinfo_states state;
+};
+static void *devinfo_start(struct seq_file *f, loff_t *pos)
+{
+        struct devinfo_state *info = f->private;
+        if (*pos) {
+                if ((info) && (*pos <= info->num_records))
+                        return info;
+                return NULL;
+        }
+        info = kmalloc(sizeof(*info), GFP_KERNEL);
+        f->private = info;
+        info->chrdev = acquire_chrdev_list();
+        info->blkdev = acquire_blkdev_list();
+        info->state = CHR_HDR;
+        info->num_records = count_chrdev_list();
+        info->num_records += count_blkdev_list();
+        info->num_records += 2; /* Character and Block headers */
+        *pos = 1;
+        info->cur_record = *pos;
+        return info;
+}
+static void *devinfo_next(struct seq_file *f, void *v, loff_t *pos)
+{
+        int idummy;
+        char *ndummy;
+        struct devinfo_state *info = f->private;
+        switch (info->state) {
+                case CHR_HDR:
+                        info->state = CHR_LIST;
+                        (*pos)++;
+                        /*fallthrough*/
+                case CHR_LIST:
+                        if (get_chrdev_info(info->chrdev,&idummy,&ndummy)) {
+                                /*
+                                 * The character dev list is complete
+                                 */
+                                info->state = BLK_HDR;
+                        } else {
+                                info->chrdev = get_next_chrdev(info->chrdev);
+                        }
+                        (*pos)++;
+                        break;
+                case BLK_HDR:
+                        info->state = BLK_LIST;
+                        (*pos)++;
+                        break;
+                case BLK_LIST:
+                        if (get_blkdev_info(info->blkdev,&idummy,&ndummy)) {
+                                /*
+                                 * The block dev list is complete
+                                 */
+                                info->state = DEVINFO_DONE;
+                        } else {
+                                info->blkdev = get_next_blkdev(info->blkdev);
+                        }
+                        (*pos)++;
+                        break;
+                case DEVINFO_DONE:
+                        (*pos)++;
+                        info->cur_record = *pos;
+                        info = NULL;
+                        break;
+                default:
+                        break;
+        }
+        if (info)
+                info->cur_record = *pos;
+        return info;
+}
+static void devinfo_stop(struct seq_file *f, void *v)
+{
+        struct devinfo_state *info = f->private;
+        if (info) {
+                release_chrdev_list(info->chrdev);
+                release_blkdev_list(info->blkdev);
+                f->private = NULL;
+                kfree(info);
+        }
+}
+static int devinfo_show(struct seq_file *f, void *arg)
+{
+        int major;
+        char *name;
+        struct devinfo_state *info = f->private;
+        switch(info->state) {
+                case CHR_HDR:
+                        seq_printf(f,"Character devices:\n");
+                        /* fallthrough */
+                case CHR_LIST:
+                        if (!get_chrdev_info(info->chrdev,&major,&name))
+                                seq_printf(f,"%3d %s\n",major,name);
+                        break;
+                case BLK_HDR:
+                        seq_printf(f,"\nBlock devices:\n");
+                        /* fallthrough */
+                case BLK_LIST:
+                        if (!get_blkdev_info(info->blkdev,&major,&name))
+                                seq_printf(f,"%3d %s\n",major,name);
+                        break;
+                default:
+                        break;
+        }
+        return 0;
+}
+static  struct seq_operations devinfo_op = {
+        .start  = devinfo_start,
+        .next   = devinfo_next,
+        .stop   = devinfo_stop,
+        .show   = devinfo_show,
+};
+static int devinfo_open(struct inode *inode, struct file *file)
+{
+        return seq_open(file, &devinfo_op);
+}
+static struct file_operations proc_devinfo_operations = {
+        .open           = devinfo_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = seq_release,
+};
 static struct file_operations proc_cpuinfo_operations = {
        .open           = cpuinfo_open,
        .read           = seq_read,
@@ -323,6 +471,7 @@ static struct file_operations proc_modules_operations = {
 };
 #endif
+#ifdef CONFIG_SLAB
 extern struct seq_operations slabinfo_op;
 extern ssize_t slabinfo_write(struct file *, const char __user *, size_t, loff_t *);
 static int slabinfo_open(struct inode *inode, struct file *file)
@@ -336,6 +485,7 @@ static struct file_operations proc_slabinfo_operations = {
        .llseek         = seq_lseek,
        .release        = seq_release,
 };
+#endif
 static int show_stat(struct seq_file *p, void *v)
 {
@@ -448,14 +598,6 @@ static struct file_operations proc_stat_operations = {
        .release        = single_release,
 };
-static int devices_read_proc(char *page, char **start, off_t off,
-                                 int count, int *eof, void *data)
-{
-        int len = get_chrdev_list(page);
-        len += get_blkdev_list(page+len, len);
-        return proc_calc_metrics(page, start, off, count, eof, len);
-}
 /*
 * /proc/interrupts
 */
@@ -580,7 +722,6 @@ void __init proc_misc_init(void)
 #ifdef CONFIG_STRAM_PROC
                {"stram",       stram_read_proc},
 #endif
-                {"devices",     devices_read_proc},
                {"filesystems", filesystems_read_proc},
                {"cmdline",     cmdline_read_proc},
                {"locks",       locks_read_proc},
@@ -596,11 +737,14 @@ void __init proc_misc_init(void)
        entry = create_proc_entry("kmsg", S_IRUSR, &proc_root);
        if (entry)
                entry->proc_fops = &proc_kmsg_operations;
+        create_seq_entry("devices", 0, &proc_devinfo_operations);
        create_seq_entry("cpuinfo", 0, &proc_cpuinfo_operations);
        create_seq_entry("partitions", 0, &proc_partitions_operations);
        create_seq_entry("stat", 0, &proc_stat_operations);
        create_seq_entry("interrupts", 0, &proc_interrupts_operations);
+#ifdef CONFIG_SLAB
        create_seq_entry("slabinfo",S_IWUSR|S_IRUGO,&proc_slabinfo_operations);
+#endif
        create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations);
        create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations);
        create_seq_entry("zoneinfo",S_IRUGO, &proc_zoneinfo_file_operations);
diff --git a/fs/proc/root.c b/fs/proc/root.c
index aef148f099a2..68896283c8ae 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -18,6 +18,8 @@
 #include <linux/bitops.h>
 #include <linux/smp_lock.h>
+#include "internal.h"
 struct proc_dir_entry *proc_net, *proc_net_stat, *proc_bus, *proc_root_fs, *proc_root_driver;
 #ifdef CONFIG_SYSCTL
@@ -36,7 +38,6 @@ static struct file_system_type proc_fs_type = {
        .kill_sb        = kill_anon_super,
 };
-extern int __init proc_init_inodecache(void);
 void __init proc_root_init(void)
 {
        int err = proc_init_inodecache();
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 50bd5a8f0446..0eaad41f4658 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -390,129 +390,12 @@ struct seq_operations proc_pid_smaps_op = {
 };
 #ifdef CONFIG_NUMA
+extern int show_numa_map(struct seq_file *m, void *v);
-struct numa_maps {
-        unsigned long pages;
-        unsigned long anon;
-        unsigned long mapped;
-        unsigned long mapcount_max;
-        unsigned long node[MAX_NUMNODES];
-};
-/*
- * Calculate numa node maps for a vma
- */
-static struct numa_maps *get_numa_maps(struct vm_area_struct *vma)
-{
-        int i;
-        struct page *page;
-        unsigned long vaddr;
-        struct numa_maps *md = kmalloc(sizeof(struct numa_maps), GFP_KERNEL);
-        if (!md)
-                return NULL;
-        md->pages = 0;
-        md->anon = 0;
-        md->mapped = 0;
-        md->mapcount_max = 0;
-        for_each_node(i)
-                md->node[i] =0;
-        for (vaddr = vma->vm_start; vaddr < vma->vm_end; vaddr += PAGE_SIZE) {
-                page = follow_page(vma, vaddr, 0);
-                if (page) {
-                        int count = page_mapcount(page);
-                        if (count)
-                                md->mapped++;
-                        if (count > md->mapcount_max)
-                                md->mapcount_max = count;
-                        md->pages++;
-                        if (PageAnon(page))
-                                md->anon++;
-                        md->node[page_to_nid(page)]++;
-                }
-                cond_resched();
-        }
-        return md;
-}
-static int show_numa_map(struct seq_file *m, void *v)
-{
-        struct task_struct *task = m->private;
-        struct vm_area_struct *vma = v;
-        struct mempolicy *pol;
-        struct numa_maps *md;
-        struct zone **z;
-        int n;
-        int first;
-        if (!vma->vm_mm)
-                return 0;
-        md = get_numa_maps(vma);
-        if (!md)
-                return 0;
-        seq_printf(m, "%08lx", vma->vm_start);
-        pol = get_vma_policy(task, vma, vma->vm_start);
-        /* Print policy */
-        switch (pol->policy) {
-        case MPOL_PREFERRED:
-                seq_printf(m, " prefer=%d", pol->v.preferred_node);
-                break;
-        case MPOL_BIND:
-                seq_printf(m, " bind={");
-                first = 1;
-                for (z = pol->v.zonelist->zones; *z; z++) {
-                        if (!first)
-                                seq_putc(m, ',');
-                        else
-                                first = 0;
-                        seq_printf(m, "%d/%s", (*z)->zone_pgdat->node_id,
-                                        (*z)->name);
-                }
-                seq_putc(m, '}');
-                break;
-        case MPOL_INTERLEAVE:
-                seq_printf(m, " interleave={");
-                first = 1;
-                for_each_node(n) {
-                        if (node_isset(n, pol->v.nodes)) {
-                                if (!first)
-                                        seq_putc(m,',');
-                                else
-                                        first = 0;
-                                seq_printf(m, "%d",n);
-                        }
-                }
-                seq_putc(m, '}');
-                break;
-        default:
-                seq_printf(m," default");
-                break;
-        }
-        seq_printf(m, " MaxRef=%lu Pages=%lu Mapped=%lu",
-                        md->mapcount_max, md->pages, md->mapped);
-        if (md->anon)
-                seq_printf(m," Anon=%lu",md->anon);
-        for_each_online_node(n) {
-                if (md->node[n])
-                        seq_printf(m, " N%d=%lu", n, md->node[n]);
-        }
-        seq_putc(m, '\n');
-        kfree(md);
-        if (m->count < m->size)  /* vma is copied successfully */
-                m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0;
-        return 0;
-}
 struct seq_operations proc_pid_numa_maps_op = {
-        .start  = m_start,
+        .start  = m_start,
-        .next   = m_next,
+        .next   = m_next,
-        .stop   = m_stop,
+        .stop   = m_stop,
-        .show   = show_numa_map
+        .show   = show_numa_map
 };
 #endif
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 3b2e7b69e63a..4063fb32f78c 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -14,7 +14,6 @@
 #include <linux/a.out.h>
 #include <linux/elf.h>
 #include <linux/elfcore.h>
-#include <linux/proc_fs.h>
 #include <linux/highmem.h>
 #include <linux/bootmem.h>
 #include <linux/init.h>
@@ -35,11 +34,14 @@ static size_t elfcorebuf_sz;
 /* Total size of vmcore file. */
 static u64 vmcore_size;
+/* Stores the physical address of elf header of crash image. */
+unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
 struct proc_dir_entry *proc_vmcore = NULL;
 /* Reads a page from the oldmem device from given offset. */
 static ssize_t read_from_oldmem(char *buf, size_t count,
-                             loff_t *ppos, int userbuf)
+                                u64 *ppos, int userbuf)
 {
        unsigned long pfn, offset;
        size_t nr_bytes;
diff --git a/fs/quota.c b/fs/quota.c
index 612e04db4b93..ba9e0bf32f67 100644
--- a/fs/quota.c
+++ b/fs/quota.c
@@ -15,6 +15,7 @@
 #include <linux/security.h>
 #include <linux/syscalls.h>
 #include <linux/buffer_head.h>
+#include <linux/capability.h>
 #include <linux/quotaops.h>
 /* Check validity of generic quotactl commands */
@@ -168,7 +169,7 @@ static void quota_sync_sb(struct super_block *sb, int type)
        sync_blockdev(sb->s_bdev);
        /* Now when everything is written we can discard the pagecache so
-         * that userspace sees the changes. We need i_sem and so we could
+         * that userspace sees the changes. We need i_mutex and so we could
         * not do it inside dqonoff_sem. Moreover we need to be carefull
         * about races with quotaoff() (that is the reason why we have own
         * reference to inode). */
@@ -184,9 +185,9 @@ static void quota_sync_sb(struct super_block *sb, int type)
        up(&sb_dqopt(sb)->dqonoff_sem);
        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
                if (discard[cnt]) {
-                        down(&discard[cnt]->i_sem);
+                        mutex_lock(&discard[cnt]->i_mutex);
                        truncate_inode_pages(&discard[cnt]->i_data, 0);
-                        up(&discard[cnt]->i_sem);
+                        mutex_unlock(&discard[cnt]->i_mutex);
                        iput(discard[cnt]);
                }
        }
diff --git a/fs/quota_v2.c b/fs/quota_v2.c
index 7afcbb1b9376..a4ef91bb4f3b 100644
--- a/fs/quota_v2.c
+++ b/fs/quota_v2.c
@@ -35,7 +35,8 @@ static int v2_check_quota_file(struct super_block *sb, int type)
 
        size = sb->s_op->quota_read(sb, type, (char *)&dqhead, sizeof(struct v2_disk_dqheader), 0);
        if (size != sizeof(struct v2_disk_dqheader)) {
-                printk("failed read\n");
+                printk("quota_v2: failed read expected=%d got=%d\n",
+                        sizeof(struct v2_disk_dqheader), size);
                return 0;
        }
        if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] ||
diff --git a/fs/read_write.c b/fs/read_write.c
index df3468a22fea..3f7a1a62165f 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -33,7 +33,7 @@ loff_t generic_file_llseek(struct file *file, loff_t offset, int origin)
        long long retval;
        struct inode *inode = file->f_mapping->host;
-        down(&inode->i_sem);
+        mutex_lock(&inode->i_mutex);
        switch (origin) {
                case 2:
                        offset += inode->i_size;
@@ -49,7 +49,7 @@ loff_t generic_file_llseek(struct file *file, loff_t offset, int origin)
                }
                retval = offset;
        }
-        up(&inode->i_sem);
+        mutex_unlock(&inode->i_mutex);
        return retval;
 }
diff --git a/fs/readdir.c b/fs/readdir.c
index b03579bc0210..b6109329b607 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -30,13 +30,13 @@ int vfs_readdir(struct file *file, filldir_t filler, void *buf)
        if (res)
                goto out;
-        down(&inode->i_sem);
+        mutex_lock(&inode->i_mutex);
        res = -ENOENT;
        if (!IS_DEADDIR(inode)) {
                res = file->f_op->readdir(file, buf, filler);
                file_accessed(file);
        }
-        up(&inode->i_sem);
+        mutex_unlock(&inode->i_mutex);
 out:
        return res;
 }
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 7892a865b58a..ad6fa964b0e7 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -49,7 +49,7 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp)
        }
        reiserfs_write_lock(inode->i_sb);
-        down(&inode->i_sem);
+        mutex_lock(&inode->i_mutex);
        /* freeing preallocation only involves relogging blocks that
         * are already in the current transaction.  preallocation gets
         * freed at the end of each transaction, so it is impossible for
@@ -100,7 +100,7 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp)
                err = reiserfs_truncate_file(inode, 0);
        }
      out:
-        up(&inode->i_sem);
+        mutex_unlock(&inode->i_mutex);
        reiserfs_write_unlock(inode->i_sb);
        return err;
 }
@@ -1342,7 +1342,7 @@ static ssize_t reiserfs_file_write(struct file *file,	/* the file we are going t
        if (unlikely(!access_ok(VERIFY_READ, buf, count)))
                return -EFAULT;
-        down(&inode->i_sem);    // locks the entire file for just us
+        mutex_lock(&inode->i_mutex);    // locks the entire file for just us
        pos = *ppos;
@@ -1360,7 +1360,7 @@ static ssize_t reiserfs_file_write(struct file *file,	/* the file we are going t
        if (res)
                goto out;
-        inode_update_time(inode, 1);    /* Both mtime and ctime */
+        file_update_time(file);
        // Ok, we are done with all the checks.
@@ -1532,12 +1532,12 @@ static ssize_t reiserfs_file_write(struct file *file,	/* the file we are going t
                    generic_osync_inode(inode, file->f_mapping,
                                        OSYNC_METADATA | OSYNC_DATA);
-        up(&inode->i_sem);
+        mutex_unlock(&inode->i_mutex);
        reiserfs_async_progress_wait(inode->i_sb);
        return (already_written != 0) ? already_written : res;
      out:
-        up(&inode->i_sem);      // unlock the file on exit.
+        mutex_unlock(&inode->i_mutex);  // unlock the file on exit.
        return res;
 }
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index a5e3a0ddbe53..ffa34b861bdb 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -40,12 +40,12 @@ void reiserfs_delete_inode(struct inode *inode)
        /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */
        if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) {  /* also handles bad_inode case */
-                down(&inode->i_sem);
+                mutex_lock(&inode->i_mutex);
                reiserfs_delete_xattrs(inode);
                if (journal_begin(&th, inode->i_sb, jbegin_count)) {
-                        up(&inode->i_sem);
+                        mutex_unlock(&inode->i_mutex);
                        goto out;
                }
                reiserfs_update_inode_transaction(inode);
@@ -59,11 +59,11 @@ void reiserfs_delete_inode(struct inode *inode)
                        DQUOT_FREE_INODE(inode);
                if (journal_end(&th, inode->i_sb, jbegin_count)) {
-                        up(&inode->i_sem);
+                        mutex_unlock(&inode->i_mutex);
                        goto out;
                }
-                up(&inode->i_sem);
+                mutex_unlock(&inode->i_mutex);
                /* check return value from reiserfs_delete_object after
                 * ending the transaction
@@ -551,7 +551,7 @@ static int convert_tail_for_hole(struct inode *inode,
        /* we don't have to make sure the conversion did not happen while
         ** we were locking the page because anyone that could convert
-         ** must first take i_sem.
+         ** must first take i_mutex.
         **
         ** We must fix the tail page for writing because it might have buffers
         ** that are mapped, but have a block number of 0.  This indicates tail
@@ -586,7 +586,7 @@ static inline int _allocate_block(struct reiserfs_transaction_handle *th,
        BUG_ON(!th->t_trans_id);
 #ifdef REISERFS_PREALLOCATE
-        if (!(flags & GET_BLOCK_NO_ISEM)) {
+        if (!(flags & GET_BLOCK_NO_IMUX)) {
                return reiserfs_new_unf_blocknrs2(th, inode, allocated_block_nr,
                                                  path, block);
        }
@@ -2318,7 +2318,7 @@ static int map_block_for_writepage(struct inode *inode,
        /* this is where we fill in holes in the file. */
        if (use_get_block) {
                retval = reiserfs_get_block(inode, block, bh_result,
-                                            GET_BLOCK_CREATE | GET_BLOCK_NO_ISEM
+                                            GET_BLOCK_CREATE | GET_BLOCK_NO_IMUX
                                            | GET_BLOCK_NO_DANGLE);
                if (!retval) {
                        if (!buffer_mapped(bh_result)
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 81fc00285f60..745c88100895 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -2,6 +2,7 @@
 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
 */
+#include <linux/capability.h>
 #include <linux/fs.h>
 #include <linux/reiserfs_fs.h>
 #include <linux/time.h>
@@ -120,7 +121,7 @@ static int reiserfs_unpack(struct inode *inode, struct file *filp)
        /* we need to make sure nobody is changing the file size beneath
         ** us
         */
-        down(&inode->i_sem);
+        mutex_lock(&inode->i_mutex);
        write_from = inode->i_size & (blocksize - 1);
        /* if we are on a block boundary, we are already unpacked.  */
@@ -156,7 +157,7 @@ static int reiserfs_unpack(struct inode *inode, struct file *filp)
        page_cache_release(page);
      out:
-        up(&inode->i_sem);
+        mutex_unlock(&inode->i_mutex);
        reiserfs_write_unlock(inode->i_sb);
        return retval;
 }
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 3f17ef844fb6..4491fcf2a0e6 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -3925,10 +3925,13 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
                flush = 1;
        }
 #ifdef REISERFS_PREALLOCATE
-        /* quota ops might need to nest, setup the journal_info pointer for them */
+        /* quota ops might need to nest, setup the journal_info pointer for them
+         * and raise the refcount so that it is > 0. */
        current->journal_info = th;
+        th->t_refcount++;
        reiserfs_discard_all_prealloc(th);      /* it should not involve new blocks into
                                                 * the transaction */
+        th->t_refcount--;
        current->journal_info = th->t_handle_save;
 #endif
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 3549067c42d9..8f8d8d01107c 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -375,11 +375,7 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry,
                return ERR_PTR(-EIO);
        }
-        if (inode)
+        return d_splice_alias(inode, dentry);
-                return d_splice_alias(inode, dentry);
-        d_add(dentry, inode);
-        return NULL;
 }
 /* 
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 42afb5bef111..397d9590c8f2 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -2211,7 +2211,7 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
        size_t towrite = len;
        struct buffer_head tmp_bh, *bh;
-        down(&inode->i_sem);
+        mutex_lock(&inode->i_mutex);
        while (towrite > 0) {
                tocopy = sb->s_blocksize - offset < towrite ?
                    sb->s_blocksize - offset : towrite;
@@ -2250,7 +2250,7 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
        inode->i_version++;
        inode->i_mtime = inode->i_ctime = CURRENT_TIME;
        mark_inode_dirty(inode);
-        up(&inode->i_sem);
+        mutex_unlock(&inode->i_mutex);
        return len - towrite;
 }
diff --git a/fs/reiserfs/tail_conversion.c b/fs/reiserfs/tail_conversion.c
index c92e124f628e..196e971c03c9 100644
--- a/fs/reiserfs/tail_conversion.c
+++ b/fs/reiserfs/tail_conversion.c
@@ -205,7 +205,7 @@ int indirect2direct(struct reiserfs_transaction_handle *th, struct inode *p_s_in
                                         1) * p_s_sb->s_blocksize;
        pos1 = pos;
-        // we are protected by i_sem. The tail can not disapper, not
+        // we are protected by i_mutex. The tail can not disapper, not
        // append can be done either
        // we are in truncate or packing tail in file_release
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 02091eaac0b4..cc061bfd437b 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -30,6 +30,7 @@
 */
 #include <linux/reiserfs_fs.h>
+#include <linux/capability.h>
 #include <linux/dcache.h>
 #include <linux/namei.h>
 #include <linux/errno.h>
@@ -67,11 +68,11 @@ static struct dentry *create_xa_root(struct super_block *sb)
                goto out;
        } else if (!xaroot->d_inode) {
                int err;
-                down(&privroot->d_inode->i_sem);
+                mutex_lock(&privroot->d_inode->i_mutex);
                err =
                    privroot->d_inode->i_op->mkdir(privroot->d_inode, xaroot,
                                                   0700);
-                up(&privroot->d_inode->i_sem);
+                mutex_unlock(&privroot->d_inode->i_mutex);
                if (err) {
                        dput(xaroot);
@@ -219,7 +220,7 @@ static struct dentry *get_xa_file_dentry(const struct inode *inode,
        } else if (flags & XATTR_REPLACE || flags & FL_READONLY) {
                goto out;
        } else {
-                /* inode->i_sem is down, so nothing else can try to create
+                /* inode->i_mutex is down, so nothing else can try to create
                 * the same xattr */
                err = xadir->d_inode->i_op->create(xadir->d_inode, xafile,
                                                   0700 | S_IFREG, NULL);
@@ -268,7 +269,7 @@ static struct file *open_xa_file(const struct inode *inode, const char *name,
 * and don't mess with f->f_pos, but the idea is the same.  Do some
 * action on each and every entry in the directory.
 *
- * we're called with i_sem held, so there are no worries about the directory
+ * we're called with i_mutex held, so there are no worries about the directory
 * changing underneath us.
 */
 static int __xattr_readdir(struct file *filp, void *dirent, filldir_t filldir)
@@ -426,7 +427,7 @@ int xattr_readdir(struct file *file, filldir_t filler, void *buf)
        int res = -ENOTDIR;
        if (!file->f_op || !file->f_op->readdir)
                goto out;
-        down(&inode->i_sem);
+        mutex_lock(&inode->i_mutex);
 //        down(&inode->i_zombie);
        res = -ENOENT;
        if (!IS_DEADDIR(inode)) {
@@ -435,7 +436,7 @@ int xattr_readdir(struct file *file, filldir_t filler, void *buf)
                unlock_kernel();
        }
 //        up(&inode->i_zombie);
-        up(&inode->i_sem);
+        mutex_unlock(&inode->i_mutex);
      out:
        return res;
 }
@@ -480,7 +481,7 @@ static inline __u32 xattr_hash(const char *msg, int len)
 /* Generic extended attribute operations that can be used by xa plugins */
 /*
- * inode->i_sem: down
+ * inode->i_mutex: down
 */
 int
 reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer,
@@ -497,12 +498,6 @@ reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer,
        struct iattr newattrs;
        __u32 xahash = 0;
-        if (IS_RDONLY(inode))
-                return -EROFS;
-        if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-                return -EPERM;
        if (get_inode_sd_version(inode) == STAT_DATA_V1)
                return -EOPNOTSUPP;
@@ -535,7 +530,7 @@ reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer,
        /* Resize it so we're ok to write there */
        newattrs.ia_size = buffer_size;
        newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
-        down(&xinode->i_sem);
+        mutex_lock(&xinode->i_mutex);
        err = notify_change(fp->f_dentry, &newattrs);
        if (err)
                goto out_filp;
@@ -598,7 +593,7 @@ reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer,
        }
      out_filp:
-        up(&xinode->i_sem);
+        mutex_unlock(&xinode->i_mutex);
        fput(fp);
      out:
@@ -606,7 +601,7 @@ reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer,
 }
 /*
- * inode->i_sem: down
+ * inode->i_mutex: down
 */
 int
 reiserfs_xattr_get(const struct inode *inode, const char *name, void *buffer,
@@ -758,9 +753,6 @@ int reiserfs_xattr_del(struct inode *inode, const char *name)
        struct dentry *dir;
        int err;
-        if (IS_RDONLY(inode))
-                return -EROFS;
        dir = open_xa_dir(inode, FL_READONLY);
        if (IS_ERR(dir)) {
                err = PTR_ERR(dir);
@@ -793,7 +785,7 @@ reiserfs_delete_xattrs_filler(void *buf, const char *name, int namelen,
 }
-/* This is called w/ inode->i_sem downed */
+/* This is called w/ inode->i_mutex downed */
 int reiserfs_delete_xattrs(struct inode *inode)
 {
        struct file *fp;
@@ -946,7 +938,7 @@ int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs)
 /*
 * Inode operation getxattr()
- * Preliminary locking: we down dentry->d_inode->i_sem
+ * Preliminary locking: we down dentry->d_inode->i_mutex
 */
 ssize_t
 reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer,
@@ -970,7 +962,7 @@ reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer,
 /*
 * Inode operation setxattr()
 *
- * dentry->d_inode->i_sem down
+ * dentry->d_inode->i_mutex down
 */
 int
 reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
@@ -984,12 +976,6 @@ reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
            get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
                return -EOPNOTSUPP;
-        if (IS_RDONLY(dentry->d_inode))
-                return -EROFS;
-        if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode))
-                return -EROFS;
        reiserfs_write_lock_xattr_i(dentry->d_inode);
        lock = !has_xattr_dir(dentry->d_inode);
        if (lock)
@@ -1008,7 +994,7 @@ reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
 /*
 * Inode operation removexattr()
 *
- * dentry->d_inode->i_sem down
+ * dentry->d_inode->i_mutex down
 */
 int reiserfs_removexattr(struct dentry *dentry, const char *name)
 {
@@ -1019,12 +1005,6 @@ int reiserfs_removexattr(struct dentry *dentry, const char *name)
            get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
                return -EOPNOTSUPP;
-        if (IS_RDONLY(dentry->d_inode))
-                return -EROFS;
-        if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode))
-                return -EPERM;
        reiserfs_write_lock_xattr_i(dentry->d_inode);
        reiserfs_read_lock_xattrs(dentry->d_sb);
@@ -1091,7 +1071,7 @@ reiserfs_listxattr_filler(void *buf, const char *name, int namelen,
 /*
 * Inode operation listxattr()
 *
- * Preliminary locking: we down dentry->d_inode->i_sem
+ * Preliminary locking: we down dentry->d_inode->i_mutex
 */
 ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size)
 {
@@ -1289,9 +1269,9 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags)
                if (!IS_ERR(dentry)) {
                        if (!(mount_flags & MS_RDONLY) && !dentry->d_inode) {
                                struct inode *inode = dentry->d_parent->d_inode;
-                                down(&inode->i_sem);
+                                mutex_lock(&inode->i_mutex);
                                err = inode->i_op->mkdir(inode, dentry, 0700);
-                                up(&inode->i_sem);
+                                mutex_unlock(&inode->i_mutex);
                                if (err) {
                                        dput(dentry);
                                        dentry = NULL;
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index a47ac9aac8b2..43de3ba83332 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -1,3 +1,4 @@
+#include <linux/capability.h>
 #include <linux/fs.h>
 #include <linux/posix_acl.h>
 #include <linux/reiserfs_fs.h>
@@ -174,7 +175,7 @@ static void *posix_acl_to_disk(const struct posix_acl *acl, size_t * size)
 /*
 * Inode operation get_posix_acl().
 *
- * inode->i_sem: down
+ * inode->i_mutex: down
 * BKL held [before 2.5.x]
 */
 struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
@@ -237,7 +238,7 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
 /*
 * Inode operation set_posix_acl().
 *
- * inode->i_sem: down
+ * inode->i_mutex: down
 * BKL held [before 2.5.x]
 */
 static int
@@ -312,7 +313,7 @@ reiserfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
        return error;
 }
-/* dir->i_sem: down,
+/* dir->i_mutex: locked,
 * inode is new and not released into the wild yet */
 int
 reiserfs_inherit_default_acl(struct inode *dir, struct dentry *dentry,
diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c
index 2501f7e66ab9..024a938ca60f 100644
--- a/fs/reiserfs/xattr_trusted.c
+++ b/fs/reiserfs/xattr_trusted.c
@@ -1,4 +1,5 @@
 #include <linux/reiserfs_fs.h>
+#include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c
index 51458048ca66..073f39364b11 100644
--- a/fs/reiserfs/xattr_user.c
+++ b/fs/reiserfs/xattr_user.c
@@ -16,18 +16,10 @@ static int
 user_get(struct inode *inode, const char *name, void *buffer, size_t size)
 {
-        int error;
        if (strlen(name) < sizeof(XATTR_USER_PREFIX))
                return -EINVAL;
        if (!reiserfs_xattrs_user(inode->i_sb))
                return -EOPNOTSUPP;
-        error = reiserfs_permission_locked(inode, MAY_READ, NULL);
-        if (error)
-                return error;
        return reiserfs_xattr_get(inode, name, buffer, size);
 }
@@ -36,43 +28,21 @@ user_set(struct inode *inode, const char *name, const void *buffer,
         size_t size, int flags)
 {
-        int error;
        if (strlen(name) < sizeof(XATTR_USER_PREFIX))
                return -EINVAL;
        if (!reiserfs_xattrs_user(inode->i_sb))
                return -EOPNOTSUPP;
-        if (!S_ISREG(inode->i_mode) &&
-            (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
-                return -EPERM;
-        error = reiserfs_permission_locked(inode, MAY_WRITE, NULL);
-        if (error)
-                return error;
        return reiserfs_xattr_set(inode, name, buffer, size, flags);
 }
 static int user_del(struct inode *inode, const char *name)
 {
-        int error;
        if (strlen(name) < sizeof(XATTR_USER_PREFIX))
                return -EINVAL;
        if (!reiserfs_xattrs_user(inode->i_sb))
                return -EOPNOTSUPP;
-        if (!S_ISREG(inode->i_mode) &&
-            (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
-                return -EPERM;
-        error = reiserfs_permission_locked(inode, MAY_WRITE, NULL);
-        if (error)
-                return error;
        return 0;
 }
diff --git a/fs/relayfs/buffers.c b/fs/relayfs/buffers.c
index 84e21ffa5ca8..10187812771e 100644
--- a/fs/relayfs/buffers.c
+++ b/fs/relayfs/buffers.c
@@ -185,5 +185,6 @@ void relay_destroy_buf(struct rchan_buf *buf)
 void relay_remove_buf(struct kref *kref)
 {
        struct rchan_buf *buf = container_of(kref, struct rchan_buf, kref);
-        relayfs_remove(buf->dentry);
+        buf->chan->cb->remove_buf_file(buf->dentry);
+        relay_destroy_buf(buf);
 }
diff --git a/fs/relayfs/inode.c b/fs/relayfs/inode.c
index 0f7f88d067ad..383523011aad 100644
--- a/fs/relayfs/inode.c
+++ b/fs/relayfs/inode.c
@@ -26,31 +26,22 @@
 static struct vfsmount *                relayfs_mount;
 static int                              relayfs_mount_count;
-static kmem_cache_t *                   relayfs_inode_cachep;
 static struct backing_dev_info          relayfs_backing_dev_info = {
        .ra_pages       = 0,    /* No readahead */
        .capabilities   = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
 };
-static struct inode *relayfs_get_inode(struct super_block *sb, int mode,
+static struct inode *relayfs_get_inode(struct super_block *sb,
-                                       struct rchan *chan)
+                                       int mode,
+                                       struct file_operations *fops,
+                                       void *data)
 {
-        struct rchan_buf *buf = NULL;
        struct inode *inode;
-        if (S_ISREG(mode)) {
-                BUG_ON(!chan);
-                buf = relay_create_buf(chan);
-                if (!buf)
-                        return NULL;
-        }
        inode = new_inode(sb);
-        if (!inode) {
+        if (!inode)
-                relay_destroy_buf(buf);
                return NULL;
-        }
        inode->i_mode = mode;
        inode->i_uid = 0;
@@ -61,8 +52,9 @@ static struct inode *relayfs_get_inode(struct super_block *sb, int mode,
        inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
        switch (mode & S_IFMT) {
        case S_IFREG:
-                inode->i_fop = &relayfs_file_operations;
+                inode->i_fop = fops;
-                RELAYFS_I(inode)->buf = buf;
+                if (data)
+                        inode->u.generic_ip = data;
                break;
        case S_IFDIR:
                inode->i_op = &simple_dir_inode_operations;
@@ -83,7 +75,8 @@ static struct inode *relayfs_get_inode(struct super_block *sb, int mode,
 *      @name: the name of the file to create
 *      @parent: parent directory
 *      @mode: mode
- *      @chan: relay channel associated with the file
+ *      @fops: file operations to use for the file
+ *      @data: user-associated data for this file
 *
 *      Returns the new dentry, NULL on failure
 *
@@ -92,7 +85,8 @@ static struct inode *relayfs_get_inode(struct super_block *sb, int mode,
 static struct dentry *relayfs_create_entry(const char *name,
                                           struct dentry *parent,
                                           int mode,
-                                           struct rchan *chan)
+                                           struct file_operations *fops,
+                                           void *data)
 {
        struct dentry *d;
        struct inode *inode;
@@ -115,7 +109,7 @@ static struct dentry *relayfs_create_entry(const char *name,
        }
        parent = dget(parent);
-        down(&parent->d_inode->i_sem);
+        mutex_lock(&parent->d_inode->i_mutex);
        d = lookup_one_len(name, parent, strlen(name));
        if (IS_ERR(d)) {
                d = NULL;
@@ -127,7 +121,7 @@ static struct dentry *relayfs_create_entry(const char *name,
                goto release_mount;
        }
-        inode = relayfs_get_inode(parent->d_inode->i_sb, mode, chan);
+        inode = relayfs_get_inode(parent->d_inode->i_sb, mode, fops, data);
        if (!inode) {
                d = NULL;
                goto release_mount;
@@ -145,7 +139,7 @@ release_mount:
        simple_release_fs(&relayfs_mount, &relayfs_mount_count);
 exit:
-        up(&parent->d_inode->i_sem);
+        mutex_unlock(&parent->d_inode->i_mutex);
        dput(parent);
        return d;
 }
@@ -155,20 +149,26 @@ exit:
 *      @name: the name of the file to create
 *      @parent: parent directory
 *      @mode: mode, if not specied the default perms are used
- *      @chan: channel associated with the file
+ *      @fops: file operations to use for the file
+ *      @data: user-associated data for this file
 *
 *      Returns file dentry if successful, NULL otherwise.
 *
 *      The file will be created user r on behalf of current user.
 */
-struct dentry *relayfs_create_file(const char *name, struct dentry *parent,
+struct dentry *relayfs_create_file(const char *name,
-                                   int mode, struct rchan *chan)
+                                   struct dentry *parent,
+                                   int mode,
+                                   struct file_operations *fops,
+                                   void *data)
 {
+        BUG_ON(!fops);
        if (!mode)
                mode = S_IRUSR;
        mode = (mode & S_IALLUGO) | S_IFREG;
-        return relayfs_create_entry(name, parent, mode, chan);
+        return relayfs_create_entry(name, parent, mode, fops, data);
 }
 /**
@@ -183,7 +183,7 @@ struct dentry *relayfs_create_file(const char *name, struct dentry *parent,
 struct dentry *relayfs_create_dir(const char *name, struct dentry *parent)
 {
        int mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
-        return relayfs_create_entry(name, parent, mode, NULL);
+        return relayfs_create_entry(name, parent, mode, NULL, NULL);
 }
 /**
@@ -204,7 +204,7 @@ int relayfs_remove(struct dentry *dentry)
                return -EINVAL;
        parent = dget(parent);
-        down(&parent->d_inode->i_sem);
+        mutex_lock(&parent->d_inode->i_mutex);
        if (dentry->d_inode) {
                if (S_ISDIR(dentry->d_inode->i_mode))
                        error = simple_rmdir(parent->d_inode, dentry);
@@ -215,7 +215,7 @@ int relayfs_remove(struct dentry *dentry)
        }
        if (!error)
                dput(dentry);
-        up(&parent->d_inode->i_sem);
+        mutex_unlock(&parent->d_inode->i_mutex);
        dput(parent);
        if (!error)
@@ -225,6 +225,17 @@ int relayfs_remove(struct dentry *dentry)
 }
 /**
+ *      relayfs_remove_file - remove a file from relay filesystem
+ *      @dentry: directory dentry
+ *
+ *      Returns 0 if successful, negative otherwise.
+ */
+int relayfs_remove_file(struct dentry *dentry)
+{
+        return relayfs_remove(dentry);
+}
+/**
 *      relayfs_remove_dir - remove a directory in the relay filesystem
 *      @dentry: directory dentry
 *
@@ -236,45 +247,45 @@ int relayfs_remove_dir(struct dentry *dentry)
 }
 /**
- *      relayfs_open - open file op for relayfs files
+ *      relay_file_open - open file op for relay files
 *      @inode: the inode
 *      @filp: the file
 *
 *      Increments the channel buffer refcount.
 */
-static int relayfs_open(struct inode *inode, struct file *filp)
+static int relay_file_open(struct inode *inode, struct file *filp)
 {
-        struct rchan_buf *buf = RELAYFS_I(inode)->buf;
+        struct rchan_buf *buf = inode->u.generic_ip;
        kref_get(&buf->kref);
+        filp->private_data = buf;
        return 0;
 }
 /**
- *      relayfs_mmap - mmap file op for relayfs files
+ *      relay_file_mmap - mmap file op for relay files
 *      @filp: the file
 *      @vma: the vma describing what to map
 *
 *      Calls upon relay_mmap_buf to map the file into user space.
 */
-static int relayfs_mmap(struct file *filp, struct vm_area_struct *vma)
+static int relay_file_mmap(struct file *filp, struct vm_area_struct *vma)
 {
-        struct inode *inode = filp->f_dentry->d_inode;
+        struct rchan_buf *buf = filp->private_data;
-        return relay_mmap_buf(RELAYFS_I(inode)->buf, vma);
+        return relay_mmap_buf(buf, vma);
 }
 /**
- *      relayfs_poll - poll file op for relayfs files
+ *      relay_file_poll - poll file op for relay files
 *      @filp: the file
 *      @wait: poll table
 *
 *      Poll implemention.
 */
-static unsigned int relayfs_poll(struct file *filp, poll_table *wait)
+static unsigned int relay_file_poll(struct file *filp, poll_table *wait)
 {
        unsigned int mask = 0;
-        struct inode *inode = filp->f_dentry->d_inode;
+        struct rchan_buf *buf = filp->private_data;
-        struct rchan_buf *buf = RELAYFS_I(inode)->buf;
        if (buf->finalized)
                return POLLERR;
@@ -289,27 +300,27 @@ static unsigned int relayfs_poll(struct file *filp, poll_table *wait)
 }
 /**
- *      relayfs_release - release file op for relayfs files
+ *      relay_file_release - release file op for relay files
 *      @inode: the inode
 *      @filp: the file
 *
 *      Decrements the channel refcount, as the filesystem is
 *      no longer using it.
 */
-static int relayfs_release(struct inode *inode, struct file *filp)
+static int relay_file_release(struct inode *inode, struct file *filp)
 {
-        struct rchan_buf *buf = RELAYFS_I(inode)->buf;
+        struct rchan_buf *buf = filp->private_data;
        kref_put(&buf->kref, relay_remove_buf);
        return 0;
 }
 /**
- *      relayfs_read_consume - update the consumed count for the buffer
+ *      relay_file_read_consume - update the consumed count for the buffer
 */
-static void relayfs_read_consume(struct rchan_buf *buf,
+static void relay_file_read_consume(struct rchan_buf *buf,
-                                 size_t read_pos,
+                                    size_t read_pos,
-                                 size_t bytes_consumed)
+                                    size_t bytes_consumed)
 {
        size_t subbuf_size = buf->chan->subbuf_size;
        size_t n_subbufs = buf->chan->n_subbufs;
@@ -332,9 +343,9 @@ static void relayfs_read_consume(struct rchan_buf *buf,
 }
 /**
- *      relayfs_read_avail - boolean, are there unconsumed bytes available?
+ *      relay_file_read_avail - boolean, are there unconsumed bytes available?
 */
-static int relayfs_read_avail(struct rchan_buf *buf, size_t read_pos)
+static int relay_file_read_avail(struct rchan_buf *buf, size_t read_pos)
 {
        size_t bytes_produced, bytes_consumed, write_offset;
        size_t subbuf_size = buf->chan->subbuf_size;
@@ -365,16 +376,16 @@ static int relayfs_read_avail(struct rchan_buf *buf, size_t read_pos)
        if (bytes_produced == bytes_consumed)
                return 0;
-        relayfs_read_consume(buf, read_pos, 0);
+        relay_file_read_consume(buf, read_pos, 0);
        return 1;
 }
 /**
- *      relayfs_read_subbuf_avail - return bytes available in sub-buffer
+ *      relay_file_read_subbuf_avail - return bytes available in sub-buffer
 */
-static size_t relayfs_read_subbuf_avail(size_t read_pos,
+static size_t relay_file_read_subbuf_avail(size_t read_pos,
-                                        struct rchan_buf *buf)
+                                           struct rchan_buf *buf)
 {
        size_t padding, avail = 0;
        size_t read_subbuf, read_offset, write_subbuf, write_offset;
@@ -396,14 +407,14 @@ static size_t relayfs_read_subbuf_avail(size_t read_pos,
 }
 /**
- *      relayfs_read_start_pos - find the first available byte to read
+ *      relay_file_read_start_pos - find the first available byte to read
 *
 *      If the read_pos is in the middle of padding, return the
 *      position of the first actually available byte, otherwise
 *      return the original value.
 */
-static size_t relayfs_read_start_pos(size_t read_pos,
+static size_t relay_file_read_start_pos(size_t read_pos,
-                                     struct rchan_buf *buf)
+                                        struct rchan_buf *buf)
 {
        size_t read_subbuf, padding, padding_start, padding_end;
        size_t subbuf_size = buf->chan->subbuf_size;
@@ -422,11 +433,11 @@ static size_t relayfs_read_start_pos(size_t read_pos,
 }
 /**
- *      relayfs_read_end_pos - return the new read position
+ *      relay_file_read_end_pos - return the new read position
 */
-static size_t relayfs_read_end_pos(struct rchan_buf *buf,
+static size_t relay_file_read_end_pos(struct rchan_buf *buf,
-                                   size_t read_pos,
+                                      size_t read_pos,
-                                   size_t count)
+                                      size_t count)
 {
        size_t read_subbuf, padding, end_pos;
        size_t subbuf_size = buf->chan->subbuf_size;
@@ -445,7 +456,7 @@ static size_t relayfs_read_end_pos(struct rchan_buf *buf,
 }
 /**
- *      relayfs_read - read file op for relayfs files
+ *      relay_file_read - read file op for relay files
 *      @filp: the file
 *      @buffer: the userspace buffer
 *      @count: number of bytes to read
@@ -454,23 +465,23 @@ static size_t relayfs_read_end_pos(struct rchan_buf *buf,
 *      Reads count bytes or the number of bytes available in the
 *      current sub-buffer being read, whichever is smaller.
 */
-static ssize_t relayfs_read(struct file *filp,
+static ssize_t relay_file_read(struct file *filp,
-                            char __user *buffer,
+                               char __user *buffer,
-                            size_t count,
+                               size_t count,
-                            loff_t *ppos)
+                               loff_t *ppos)
 {
+        struct rchan_buf *buf = filp->private_data;
        struct inode *inode = filp->f_dentry->d_inode;
-        struct rchan_buf *buf = RELAYFS_I(inode)->buf;
        size_t read_start, avail;
        ssize_t ret = 0;
        void *from;
-        down(&inode->i_sem);
+        mutex_lock(&inode->i_mutex);
-        if(!relayfs_read_avail(buf, *ppos))
+        if(!relay_file_read_avail(buf, *ppos))
                goto out;
-        read_start = relayfs_read_start_pos(*ppos, buf);
+        read_start = relay_file_read_start_pos(*ppos, buf);
-        avail = relayfs_read_subbuf_avail(read_start, buf);
+        avail = relay_file_read_subbuf_avail(read_start, buf);
        if (!avail)
                goto out;
@@ -480,58 +491,25 @@ static ssize_t relayfs_read(struct file *filp,
                ret = -EFAULT;
                goto out;
        }
-        relayfs_read_consume(buf, read_start, count);
+        relay_file_read_consume(buf, read_start, count);
-        *ppos = relayfs_read_end_pos(buf, read_start, count);
+        *ppos = relay_file_read_end_pos(buf, read_start, count);
 out:
-        up(&inode->i_sem);
+        mutex_unlock(&inode->i_mutex);
        return ret;
 }
-/**
+struct file_operations relay_file_operations = {
- *      relayfs alloc_inode() implementation
+        .open           = relay_file_open,
- */
+        .poll           = relay_file_poll,
-static struct inode *relayfs_alloc_inode(struct super_block *sb)
+        .mmap           = relay_file_mmap,
-{
+        .read           = relay_file_read,
-        struct relayfs_inode_info *p = kmem_cache_alloc(relayfs_inode_cachep, SLAB_KERNEL);
-        if (!p)
-                return NULL;
-        p->buf = NULL;
-        return &p->vfs_inode;
-}
-/**
- *      relayfs destroy_inode() implementation
- */
-static void relayfs_destroy_inode(struct inode *inode)
-{
-        if (RELAYFS_I(inode)->buf)
-                relay_destroy_buf(RELAYFS_I(inode)->buf);
-        kmem_cache_free(relayfs_inode_cachep, RELAYFS_I(inode));
-}
-static void init_once(void *p, kmem_cache_t *cachep, unsigned long flags)
-{
-        struct relayfs_inode_info *i = p;
-        if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR)
-                inode_init_once(&i->vfs_inode);
-}
-struct file_operations relayfs_file_operations = {
-        .open           = relayfs_open,
-        .poll           = relayfs_poll,
-        .mmap           = relayfs_mmap,
-        .read           = relayfs_read,
        .llseek         = no_llseek,
-        .release        = relayfs_release,
+        .release        = relay_file_release,
 };
 static struct super_operations relayfs_ops = {
        .statfs         = simple_statfs,
        .drop_inode     = generic_delete_inode,
-        .alloc_inode    = relayfs_alloc_inode,
-        .destroy_inode  = relayfs_destroy_inode,
 };
 static int relayfs_fill_super(struct super_block * sb, void * data, int silent)
@@ -544,7 +522,7 @@ static int relayfs_fill_super(struct super_block * sb, void * data, int silent)
        sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
        sb->s_magic = RELAYFS_MAGIC;
        sb->s_op = &relayfs_ops;
-        inode = relayfs_get_inode(sb, mode, NULL);
+        inode = relayfs_get_inode(sb, mode, NULL, NULL);
        if (!inode)
                return -ENOMEM;
@@ -575,33 +553,27 @@ static struct file_system_type relayfs_fs_type = {
 static int __init init_relayfs_fs(void)
 {
-        int err;
+        return register_filesystem(&relayfs_fs_type);
-        relayfs_inode_cachep = kmem_cache_create("relayfs_inode_cache",
-                                sizeof(struct relayfs_inode_info), 0,
-                                0, init_once, NULL);
-        if (!relayfs_inode_cachep)
-                return -ENOMEM;
-        err = register_filesystem(&relayfs_fs_type);
-        if (err)
-                kmem_cache_destroy(relayfs_inode_cachep);
-        return err;
 }
 static void __exit exit_relayfs_fs(void)
 {
        unregister_filesystem(&relayfs_fs_type);
-        kmem_cache_destroy(relayfs_inode_cachep);
 }
 module_init(init_relayfs_fs)
 module_exit(exit_relayfs_fs)
-EXPORT_SYMBOL_GPL(relayfs_file_operations);
+EXPORT_SYMBOL_GPL(relay_file_operations);
 EXPORT_SYMBOL_GPL(relayfs_create_dir);
 EXPORT_SYMBOL_GPL(relayfs_remove_dir);
+EXPORT_SYMBOL_GPL(relayfs_create_file);
+EXPORT_SYMBOL_GPL(relayfs_remove_file);
 MODULE_AUTHOR("Tom Zanussi <zanussi@us.ibm.com> and Karim Yaghmour <karim@opersys.com>");
 MODULE_DESCRIPTION("Relay Filesystem");
diff --git a/fs/relayfs/relay.c b/fs/relayfs/relay.c
index 2a6f7f12b7f9..abf3ceaace49 100644
--- a/fs/relayfs/relay.c
+++ b/fs/relayfs/relay.c
@@ -80,11 +80,34 @@ static void buf_unmapped_default_callback(struct rchan_buf *buf,
 {
 }
+/*
+ * create_buf_file_create() default callback.  Creates file to represent buf.
+ */
+static struct dentry *create_buf_file_default_callback(const char *filename,
+                                                       struct dentry *parent,
+                                                       int mode,
+                                                       struct rchan_buf *buf,
+                                                       int *is_global)
+{
+        return relayfs_create_file(filename, parent, mode,
+                                   &relay_file_operations, buf);
+}
+/*
+ * remove_buf_file() default callback.  Removes file representing relay buffer.
+ */
+static int remove_buf_file_default_callback(struct dentry *dentry)
+{
+        return relayfs_remove(dentry);
+}
 /* relay channel default callbacks */
 static struct rchan_callbacks default_channel_callbacks = {
        .subbuf_start = subbuf_start_default_callback,
        .buf_mapped = buf_mapped_default_callback,
        .buf_unmapped = buf_unmapped_default_callback,
+        .create_buf_file = create_buf_file_default_callback,
+        .remove_buf_file = remove_buf_file_default_callback,
 };
 /**
@@ -148,14 +171,16 @@ static inline void __relay_reset(struct rchan_buf *buf, unsigned int init)
 void relay_reset(struct rchan *chan)
 {
        unsigned int i;
+        struct rchan_buf *prev = NULL;
        if (!chan)
                return;
        for (i = 0; i < NR_CPUS; i++) {
-                if (!chan->buf[i])
+                if (!chan->buf[i] || chan->buf[i] == prev)
-                        continue;
+                        break;
                __relay_reset(chan->buf[i], 0);
+                prev = chan->buf[i];
        }
 }
@@ -166,17 +191,27 @@ void relay_reset(struct rchan *chan)
 */
 static struct rchan_buf *relay_open_buf(struct rchan *chan,
                                        const char *filename,
-                                        struct dentry *parent)
+                                        struct dentry *parent,
+                                        int *is_global)
 {
        struct rchan_buf *buf;
        struct dentry *dentry;
+        if (*is_global)
+                return chan->buf[0];
+        buf = relay_create_buf(chan);
+        if (!buf)
+                return NULL;
        /* Create file in fs */
-        dentry = relayfs_create_file(filename, parent, S_IRUSR, chan);
+        dentry = chan->cb->create_buf_file(filename, parent, S_IRUSR,
-        if (!dentry)
+                                           buf, is_global);
+        if (!dentry) {
+                relay_destroy_buf(buf);
                return NULL;
+        }
-        buf = RELAYFS_I(dentry->d_inode)->buf;
        buf->dentry = dentry;
        __relay_reset(buf, 1);
@@ -214,6 +249,10 @@ static inline void setup_callbacks(struct rchan *chan,
                cb->buf_mapped = buf_mapped_default_callback;
        if (!cb->buf_unmapped)
                cb->buf_unmapped = buf_unmapped_default_callback;
+        if (!cb->create_buf_file)
+                cb->create_buf_file = create_buf_file_default_callback;
+        if (!cb->remove_buf_file)
+                cb->remove_buf_file = remove_buf_file_default_callback;
        chan->cb = cb;
 }
@@ -241,6 +280,7 @@ struct rchan *relay_open(const char *base_filename,
        unsigned int i;
        struct rchan *chan;
        char *tmpname;
+        int is_global = 0;
        if (!base_filename)
                return NULL;
@@ -265,7 +305,8 @@ struct rchan *relay_open(const char *base_filename,
        for_each_online_cpu(i) {
                sprintf(tmpname, "%s%d", base_filename, i);
-                chan->buf[i] = relay_open_buf(chan, tmpname, parent);
+                chan->buf[i] = relay_open_buf(chan, tmpname, parent,
+                                              &is_global);
                chan->buf[i]->cpu = i;
                if (!chan->buf[i])
                        goto free_bufs;
@@ -279,6 +320,8 @@ free_bufs:
                if (!chan->buf[i])
                        break;
                relay_close_buf(chan->buf[i]);
+                if (is_global)
+                        break;
        }
        kfree(tmpname);
@@ -388,14 +431,16 @@ void relay_destroy_channel(struct kref *kref)
 void relay_close(struct rchan *chan)
 {
        unsigned int i;
+        struct rchan_buf *prev = NULL;
        if (!chan)
                return;
        for (i = 0; i < NR_CPUS; i++) {
-                if (!chan->buf[i])
+                if (!chan->buf[i] || chan->buf[i] == prev)
-                        continue;
+                        break;
                relay_close_buf(chan->buf[i]);
+                prev = chan->buf[i];
        }
        if (chan->last_toobig)
@@ -415,14 +460,16 @@ void relay_close(struct rchan *chan)
 void relay_flush(struct rchan *chan)
 {
        unsigned int i;
+        struct rchan_buf *prev = NULL;
        if (!chan)
                return;
        for (i = 0; i < NR_CPUS; i++) {
-                if (!chan->buf[i])
+                if (!chan->buf[i] || chan->buf[i] == prev)
-                        continue;
+                        break;
                relay_switch_subbuf(chan->buf[i], 0);
+                prev = chan->buf[i];
        }
 }
diff --git a/fs/relayfs/relay.h b/fs/relayfs/relay.h
index 703503fa22b6..0993d3e5753b 100644
--- a/fs/relayfs/relay.h
+++ b/fs/relayfs/relay.h
@@ -1,10 +1,6 @@
 #ifndef _RELAY_H
 #define _RELAY_H
-struct dentry *relayfs_create_file(const char *name,
-                                   struct dentry *parent,
-                                   int mode,
-                                   struct rchan *chan);
 extern int relayfs_remove(struct dentry *dentry);
 extern int relay_buf_empty(struct rchan_buf *buf);
 extern void relay_destroy_channel(struct kref *kref);
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index c74f382dabba..0a13859fd57b 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -418,7 +418,7 @@ static int
 romfs_readpage(struct file *file, struct page * page)
 {
        struct inode *inode = page->mapping->host;
-        unsigned long offset, avail, readlen;
+        loff_t offset, avail, readlen;
        void *buf;
        int result = -EIO;
@@ -429,8 +429,8 @@ romfs_readpage(struct file *file, struct page * page)
                goto err_out;
        /* 32 bit warning -- but not for us :) */
-        offset = page->index << PAGE_CACHE_SHIFT;
+        offset = page_offset(page);
-        if (offset < inode->i_size) {
+        if (offset < i_size_read(inode)) {
                avail = inode->i_size-offset;
                readlen = min_t(unsigned long, avail, PAGE_SIZE);
                if (romfs_copyfrom(inode, buf, ROMFS_I(inode)->i_dataoffset+offset, readlen) == readlen) {
diff --git a/fs/select.c b/fs/select.c
index f10a10317d54..c0f02d36c60e 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -179,12 +179,11 @@ get_max:
 #define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR)
 #define POLLEX_SET (POLLPRI)
-int do_select(int n, fd_set_bits *fds, long *timeout)
+int do_select(int n, fd_set_bits *fds, s64 *timeout)
 {
        struct poll_wqueues table;
        poll_table *wait;
        int retval, i;
-        long __timeout = *timeout;
        rcu_read_lock();
        retval = max_select_fd(n, fds);
@@ -196,11 +195,12 @@ int do_select(int n, fd_set_bits *fds, long *timeout)
        poll_initwait(&table);
        wait = &table.pt;
-        if (!__timeout)
+        if (!*timeout)
                wait = NULL;
        retval = 0;
        for (;;) {
                unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp;
+                long __timeout;
                set_current_state(TASK_INTERRUPTIBLE);
@@ -255,22 +255,32 @@ int do_select(int n, fd_set_bits *fds, long *timeout)
                                *rexp = res_ex;
                }
                wait = NULL;
-                if (retval || !__timeout || signal_pending(current))
+                if (retval || !*timeout || signal_pending(current))
                        break;
                if(table.error) {
                        retval = table.error;
                        break;
                }
+                if (*timeout < 0) {
+                        /* Wait indefinitely */
+                        __timeout = MAX_SCHEDULE_TIMEOUT;
+                } else if (unlikely(*timeout >= (s64)MAX_SCHEDULE_TIMEOUT - 1)) {
+                        /* Wait for longer than MAX_SCHEDULE_TIMEOUT. Do it in a loop */
+                        __timeout = MAX_SCHEDULE_TIMEOUT - 1;
+                        *timeout -= __timeout;
+                } else {
+                        __timeout = *timeout;
+                        *timeout = 0;
+                }
                __timeout = schedule_timeout(__timeout);
+                if (*timeout >= 0)
+                        *timeout += __timeout;
        }
        __set_current_state(TASK_RUNNING);
        poll_freewait(&table);
-        /*
-         * Up-to-date the caller timeout.
-         */
-        *timeout = __timeout;
        return retval;
 }
@@ -295,36 +305,14 @@ static void select_bits_free(void *bits, int size)
 #define MAX_SELECT_SECONDS \
        ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
-asmlinkage long
+static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
-sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timeval __user *tvp)
+                           fd_set __user *exp, s64 *timeout)
 {
        fd_set_bits fds;
        char *bits;
-        long timeout;
        int ret, size, max_fdset;
        struct fdtable *fdt;
-        timeout = MAX_SCHEDULE_TIMEOUT;
-        if (tvp) {
-                time_t sec, usec;
-                if (!access_ok(VERIFY_READ, tvp, sizeof(*tvp))
-                    || __get_user(sec, &tvp->tv_sec)
-                    || __get_user(usec, &tvp->tv_usec)) {
-                        ret = -EFAULT;
-                        goto out_nofds;
-                }
-                ret = -EINVAL;
-                if (sec < 0 || usec < 0)
-                        goto out_nofds;
-                if ((unsigned long) sec < MAX_SELECT_SECONDS) {
-                        timeout = ROUND_UP(usec, 1000000/HZ);
-                        timeout += sec * (unsigned long) HZ;
-                }
-        }
        ret = -EINVAL;
        if (n < 0)
                goto out_nofds;
@@ -362,18 +350,7 @@ sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s
        zero_fd_set(n, fds.res_out);
        zero_fd_set(n, fds.res_ex);
-        ret = do_select(n, &fds, &timeout);
+        ret = do_select(n, &fds, timeout);
-        if (tvp && !(current->personality & STICKY_TIMEOUTS)) {
-                time_t sec = 0, usec = 0;
-                if (timeout) {
-                        sec = timeout / HZ;
-                        usec = timeout % HZ;
-                        usec *= (1000000/HZ);
-                }
-                put_user(sec, &tvp->tv_sec);
-                put_user(usec, &tvp->tv_usec);
-        }
        if (ret < 0)
                goto out;
@@ -395,6 +372,154 @@ out_nofds:
        return ret;
 }
+asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
+                        fd_set __user *exp, struct timeval __user *tvp)
+{
+        s64 timeout = -1;
+        struct timeval tv;
+        int ret;
+        if (tvp) {
+                if (copy_from_user(&tv, tvp, sizeof(tv)))
+                        return -EFAULT;
+                if (tv.tv_sec < 0 || tv.tv_usec < 0)
+                        return -EINVAL;
+                /* Cast to u64 to make GCC stop complaining */
+                if ((u64)tv.tv_sec >= (u64)MAX_INT64_SECONDS)
+                        timeout = -1;   /* infinite */
+                else {
+                        timeout = ROUND_UP(tv.tv_usec, USEC_PER_SEC/HZ);
+                        timeout += tv.tv_sec * HZ;
+                }
+        }
+        ret = core_sys_select(n, inp, outp, exp, &timeout);
+        if (tvp) {
+                if (current->personality & STICKY_TIMEOUTS)
+                        goto sticky;
+                tv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ));
+                tv.tv_sec = timeout;
+                if (copy_to_user(tvp, &tv, sizeof(tv))) {
+sticky:
+                        /*
+                         * If an application puts its timeval in read-only
+                         * memory, we don't want the Linux-specific update to
+                         * the timeval to cause a fault after the select has
+                         * completed successfully. However, because we're not
+                         * updating the timeval, we can't restart the system
+                         * call.
+                         */
+                        if (ret == -ERESTARTNOHAND)
+                                ret = -EINTR;
+                }
+        }
+        return ret;
+}
+#ifdef TIF_RESTORE_SIGMASK
+asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp,
+                fd_set __user *exp, struct timespec __user *tsp,
+                const sigset_t __user *sigmask, size_t sigsetsize)
+{
+        s64 timeout = MAX_SCHEDULE_TIMEOUT;
+        sigset_t ksigmask, sigsaved;
+        struct timespec ts;
+        int ret;
+        if (tsp) {
+                if (copy_from_user(&ts, tsp, sizeof(ts)))
+                        return -EFAULT;
+                if (ts.tv_sec < 0 || ts.tv_nsec < 0)
+                        return -EINVAL;
+                /* Cast to u64 to make GCC stop complaining */
+                if ((u64)ts.tv_sec >= (u64)MAX_INT64_SECONDS)
+                        timeout = -1;   /* infinite */
+                else {
+                        timeout = ROUND_UP(ts.tv_nsec, NSEC_PER_SEC/HZ);
+                        timeout += ts.tv_sec * HZ;
+                }
+        }
+        if (sigmask) {
+                /* XXX: Don't preclude handling different sized sigset_t's.  */
+                if (sigsetsize != sizeof(sigset_t))
+                        return -EINVAL;
+                if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
+                        return -EFAULT;
+                sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
+                sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+        }
+        ret = core_sys_select(n, inp, outp, exp, &timeout);
+        if (tsp) {
+                if (current->personality & STICKY_TIMEOUTS)
+                        goto sticky;
+                ts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * 1000;
+                ts.tv_sec = timeout;
+                if (copy_to_user(tsp, &ts, sizeof(ts))) {
+sticky:
+                        /*
+                         * If an application puts its timeval in read-only
+                         * memory, we don't want the Linux-specific update to
+                         * the timeval to cause a fault after the select has
+                         * completed successfully. However, because we're not
+                         * updating the timeval, we can't restart the system
+                         * call.
+                         */
+                        if (ret == -ERESTARTNOHAND)
+                                ret = -EINTR;
+                }
+        }
+        if (ret == -ERESTARTNOHAND) {
+                /*
+                 * Don't restore the signal mask yet. Let do_signal() deliver
+                 * the signal on the way back to userspace, before the signal
+                 * mask is restored.
+                 */
+                if (sigmask) {
+                        memcpy(&current->saved_sigmask, &sigsaved,
+                                        sizeof(sigsaved));
+                        set_thread_flag(TIF_RESTORE_SIGMASK);
+                }
+        } else if (sigmask)
+                sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+        return ret;
+}
+/*
+ * Most architectures can't handle 7-argument syscalls. So we provide a
+ * 6-argument version where the sixth argument is a pointer to a structure
+ * which has a pointer to the sigset_t itself followed by a size_t containing
+ * the sigset size.
+ */
+asmlinkage long sys_pselect6(int n, fd_set __user *inp, fd_set __user *outp,
+        fd_set __user *exp, struct timespec __user *tsp, void __user *sig)
+{
+        size_t sigsetsize = 0;
+        sigset_t __user *up = NULL;
+        if (sig) {
+                if (!access_ok(VERIFY_READ, sig, sizeof(void *)+sizeof(size_t))
+                    || __get_user(up, (sigset_t * __user *)sig)
+                    || __get_user(sigsetsize,
+                                (size_t * __user)(sig+sizeof(void *))))
+                        return -EFAULT;
+        }
+        return sys_pselect7(n, inp, outp, exp, tsp, up, sigsetsize);
+}
+#endif /* TIF_RESTORE_SIGMASK */
 struct poll_list {
        struct poll_list *next;
        int len;
@@ -436,16 +561,19 @@ static void do_pollfd(unsigned int num, struct pollfd * fdpage,
 }
 static int do_poll(unsigned int nfds,  struct poll_list *list,
-                        struct poll_wqueues *wait, long timeout)
+                   struct poll_wqueues *wait, s64 *timeout)
 {
        int count = 0;
        poll_table* pt = &wait->pt;
-        if (!timeout)
+        /* Optimise the no-wait case */
+        if (!(*timeout))
                pt = NULL;
 
        for (;;) {
                struct poll_list *walk;
+                long __timeout;
                set_current_state(TASK_INTERRUPTIBLE);
                walk = list;
                while(walk != NULL) {
@@ -453,18 +581,36 @@ static int do_poll(unsigned int nfds,  struct poll_list *list,
                        walk = walk->next;
                }
                pt = NULL;
-                if (count || !timeout || signal_pending(current))
+                if (count || !*timeout || signal_pending(current))
                        break;
                count = wait->error;
                if (count)
                        break;
-                timeout = schedule_timeout(timeout);
+                if (*timeout < 0) {
+                        /* Wait indefinitely */
+                        __timeout = MAX_SCHEDULE_TIMEOUT;
+                } else if (unlikely(*timeout >= (s64)MAX_SCHEDULE_TIMEOUT-1)) {
+                        /*
+                         * Wait for longer than MAX_SCHEDULE_TIMEOUT. Do it in
+                         * a loop
+                         */
+                        __timeout = MAX_SCHEDULE_TIMEOUT - 1;
+                        *timeout -= __timeout;
+                } else {
+                        __timeout = *timeout;
+                        *timeout = 0;
+                }
+                __timeout = schedule_timeout(__timeout);
+                if (*timeout >= 0)
+                        *timeout += __timeout;
        }
        __set_current_state(TASK_RUNNING);
        return count;
 }
-asmlinkage long sys_poll(struct pollfd __user * ufds, unsigned int nfds, long timeout)
+int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout)
 {
        struct poll_wqueues table;
        int fdcount, err;
@@ -482,14 +628,6 @@ asmlinkage long sys_poll(struct pollfd __user * ufds, unsigned int nfds, long ti
        if (nfds > max_fdset && nfds > OPEN_MAX)
                return -EINVAL;
-        if (timeout) {
-                /* Careful about overflow in the intermediate values */
-                if ((unsigned long) timeout < MAX_SCHEDULE_TIMEOUT / HZ)
-                        timeout = (unsigned long)(timeout*HZ+999)/1000+1;
-                else /* Negative or overflow */
-                        timeout = MAX_SCHEDULE_TIMEOUT;
-        }
        poll_initwait(&table);
        head = NULL;
@@ -519,6 +657,7 @@ asmlinkage long sys_poll(struct pollfd __user * ufds, unsigned int nfds, long ti
                }
                i -= pp->len;
        }
        fdcount = do_poll(nfds, head, &table, timeout);
        /* OK, now copy the revents fields back to user space. */
@@ -547,3 +686,98 @@ out_fds:
        poll_freewait(&table);
        return err;
 }
+asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds,
+                        long timeout_msecs)
+{
+        s64 timeout_jiffies = 0;
+        if (timeout_msecs) {
+#if HZ > 1000
+                /* We can only overflow if HZ > 1000 */
+                if (timeout_msecs / 1000 > (s64)0x7fffffffffffffffULL / (s64)HZ)
+                        timeout_jiffies = -1;
+                else
+#endif
+                        timeout_jiffies = msecs_to_jiffies(timeout_msecs);
+        }
+        return do_sys_poll(ufds, nfds, &timeout_jiffies);
+}
+#ifdef TIF_RESTORE_SIGMASK
+asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
+        struct timespec __user *tsp, const sigset_t __user *sigmask,
+        size_t sigsetsize)
+{
+        sigset_t ksigmask, sigsaved;
+        struct timespec ts;
+        s64 timeout = -1;
+        int ret;
+        if (tsp) {
+                if (copy_from_user(&ts, tsp, sizeof(ts)))
+                        return -EFAULT;
+                /* Cast to u64 to make GCC stop complaining */
+                if ((u64)ts.tv_sec >= (u64)MAX_INT64_SECONDS)
+                        timeout = -1;   /* infinite */
+                else {
+                        timeout = ROUND_UP(ts.tv_nsec, NSEC_PER_SEC/HZ);
+                        timeout += ts.tv_sec * HZ;
+                }
+        }
+        if (sigmask) {
+                /* XXX: Don't preclude handling different sized sigset_t's.  */
+                if (sigsetsize != sizeof(sigset_t))
+                        return -EINVAL;
+                if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
+                        return -EFAULT;
+                sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
+                sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+        }
+        ret = do_sys_poll(ufds, nfds, &timeout);
+        /* We can restart this syscall, usually */
+        if (ret == -EINTR) {
+                /*
+                 * Don't restore the signal mask yet. Let do_signal() deliver
+                 * the signal on the way back to userspace, before the signal
+                 * mask is restored.
+                 */
+                if (sigmask) {
+                        memcpy(&current->saved_sigmask, &sigsaved,
+                                        sizeof(sigsaved));
+                        set_thread_flag(TIF_RESTORE_SIGMASK);
+                }
+                ret = -ERESTARTNOHAND;
+        } else if (sigmask)
+                sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+        if (tsp && timeout >= 0) {
+                if (current->personality & STICKY_TIMEOUTS)
+                        goto sticky;
+                /* Yes, we know it's actually an s64, but it's also positive. */
+                ts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * 1000;
+                ts.tv_sec = timeout;
+                if (copy_to_user(tsp, &ts, sizeof(ts))) {
+                sticky:
+                        /*
+                         * If an application puts its timeval in read-only
+                         * memory, we don't want the Linux-specific update to
+                         * the timeval to cause a fault after the select has
+                         * completed successfully. However, because we're not
+                         * updating the timeval, we can't restart the system
+                         * call.
+                         */
+                        if (ret == -ERESTARTNOHAND && timeout >= 0)
+                                ret = -EINTR;
+                }
+        }
+        return ret;
+}
+#endif /* TIF_RESTORE_SIGMASK */
diff --git a/fs/smbfs/Makefile b/fs/smbfs/Makefile
index 93246b7dd6fb..6673ee82cb4c 100644
--- a/fs/smbfs/Makefile
+++ b/fs/smbfs/Makefile
@@ -13,7 +13,6 @@ smbfs-objs := proc.o dir.o cache.o sock.o inode.o file.o ioctl.o getopt.o \
 EXTRA_CFLAGS += -DSMBFS_PARANOIA
 #EXTRA_CFLAGS += -DSMBFS_DEBUG
 #EXTRA_CFLAGS += -DSMBFS_DEBUG_VERBOSE
-#EXTRA_CFLAGS += -DDEBUG_SMB_MALLOC
 #EXTRA_CFLAGS += -DDEBUG_SMB_TIMESTAMP
 #EXTRA_CFLAGS += -Werror
diff --git a/fs/smbfs/cache.c b/fs/smbfs/cache.c
index f3e6b81288ab..74b86d9725a6 100644
--- a/fs/smbfs/cache.c
+++ b/fs/smbfs/cache.c
@@ -66,7 +66,7 @@ smb_invalidate_dircache_entries(struct dentry *parent)
        spin_lock(&dcache_lock);
        next = parent->d_subdirs.next;
        while (next != &parent->d_subdirs) {
-                dentry = list_entry(next, struct dentry, d_child);
+                dentry = list_entry(next, struct dentry, d_u.d_child);
                dentry->d_fsdata = NULL;
                smb_age_dentry(server, dentry);
                next = next->next;
@@ -100,7 +100,7 @@ smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
        spin_lock(&dcache_lock);
        next = parent->d_subdirs.next;
        while (next != &parent->d_subdirs) {
-                dent = list_entry(next, struct dentry, d_child);
+                dent = list_entry(next, struct dentry, d_u.d_child);
                if ((unsigned long)dent->d_fsdata == fpos) {
                        if (dent->d_inode)
                                dget_locked(dent);
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
index b4fcfa8b55a1..7042e62726a4 100644
--- a/fs/smbfs/file.c
+++ b/fs/smbfs/file.c
@@ -209,8 +209,8 @@ smb_updatepage(struct file *file, struct page *page, unsigned long offset,
 {
        struct dentry *dentry = file->f_dentry;
-        DEBUG1("(%s/%s %d@%ld)\n", DENTRY_PATH(dentry), 
+        DEBUG1("(%s/%s %d@%lld)\n", DENTRY_PATH(dentry), count,
-               count, (page->index << PAGE_CACHE_SHIFT)+offset);
+                ((unsigned long long)page->index << PAGE_CACHE_SHIFT) + offset);
        return smb_writepage_sync(dentry->d_inode, page, offset, count);
 }
@@ -374,8 +374,7 @@ smb_file_release(struct inode *inode, struct file * file)
                /* We must flush any dirty pages now as we won't be able to
                   write anything after close. mmap can trigger this.
                   "openers" should perhaps include mmap'ers ... */
-                filemap_fdatawrite(inode->i_mapping);
+                filemap_write_and_wait(inode->i_mapping);
-                filemap_fdatawait(inode->i_mapping);
                smb_close(inode);
        }
        unlock_kernel();
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 10b994428fef..02e3e82d465c 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -487,11 +487,11 @@ smb_put_super(struct super_block *sb)
        if (server->conn_pid)
                kill_proc(server->conn_pid, SIGTERM, 1);
-        smb_kfree(server->ops);
+        kfree(server->ops);
        smb_unload_nls(server);
        sb->s_fs_info = NULL;
        smb_unlock_server(server);
-        smb_kfree(server);
+        kfree(server);
 }
 static int smb_fill_super(struct super_block *sb, void *raw_data, int silent)
@@ -519,11 +519,10 @@ static int smb_fill_super(struct super_block *sb, void *raw_data, int silent)
        sb->s_op = &smb_sops;
        sb->s_time_gran = 100;
-        server = smb_kmalloc(sizeof(struct smb_sb_info), GFP_KERNEL);
+        server = kzalloc(sizeof(struct smb_sb_info), GFP_KERNEL);
        if (!server)
                goto out_no_server;
        sb->s_fs_info = server;
-        memset(server, 0, sizeof(struct smb_sb_info));
        server->super_block = sb;
        server->mnt = NULL;
@@ -542,8 +541,8 @@ static int smb_fill_super(struct super_block *sb, void *raw_data, int silent)
        /* FIXME: move these to the smb_sb_info struct */
        VERBOSE("alloc chunk = %d\n", sizeof(struct smb_ops) +
                sizeof(struct smb_mount_data_kernel));
-        mem = smb_kmalloc(sizeof(struct smb_ops) +
+        mem = kmalloc(sizeof(struct smb_ops) +
-                          sizeof(struct smb_mount_data_kernel), GFP_KERNEL);
+                      sizeof(struct smb_mount_data_kernel), GFP_KERNEL);
        if (!mem)
                goto out_no_mem;
@@ -621,12 +620,12 @@ out_no_root:
 out_no_smbiod:
        smb_unload_nls(server);
 out_bad_option:
-        smb_kfree(mem);
+        kfree(mem);
 out_no_mem:
        if (!server->mnt)
                printk(KERN_ERR "smb_fill_super: allocation failure\n");
        sb->s_fs_info = NULL;
-        smb_kfree(server);
+        kfree(server);
        goto out_fail;
 out_wrong_data:
        printk(KERN_ERR "smbfs: mount_data version %d is not supported\n", ver);
@@ -697,8 +696,7 @@ smb_notify_change(struct dentry *dentry, struct iattr *attr)
                        DENTRY_PATH(dentry),
                        (long) inode->i_size, (long) attr->ia_size);
-                filemap_fdatawrite(inode->i_mapping);
+                filemap_write_and_wait(inode->i_mapping);
-                filemap_fdatawait(inode->i_mapping);
                error = smb_open(dentry, O_WRONLY);
                if (error)
@@ -783,12 +781,6 @@ out:
        return error;
 }
-#ifdef DEBUG_SMB_MALLOC
-int smb_malloced;
-int smb_current_kmalloced;
-int smb_current_vmalloced;
-#endif
 static struct super_block *smb_get_sb(struct file_system_type *fs_type,
        int flags, const char *dev_name, void *data)
 {
@@ -808,12 +800,6 @@ static int __init init_smb_fs(void)
        int err;
        DEBUG1("registering ...\n");
-#ifdef DEBUG_SMB_MALLOC
-        smb_malloced = 0;
-        smb_current_kmalloced = 0;
-        smb_current_vmalloced = 0;
-#endif
        err = init_inodecache();
        if (err)
                goto out_inode;
@@ -838,11 +824,6 @@ static void __exit exit_smb_fs(void)
        unregister_filesystem(&smb_fs_type);
        smb_destroy_request_cache();
        destroy_inodecache();
-#ifdef DEBUG_SMB_MALLOC
-        printk(KERN_DEBUG "smb_malloced: %d\n", smb_malloced);
-        printk(KERN_DEBUG "smb_current_kmalloced: %d\n",smb_current_kmalloced);
-        printk(KERN_DEBUG "smb_current_vmalloced: %d\n",smb_current_vmalloced);
-#endif
 }
 module_init(init_smb_fs)
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c
index 38ab558835c4..b1b878b81730 100644
--- a/fs/smbfs/proc.c
+++ b/fs/smbfs/proc.c
@@ -8,6 +8,7 @@
 */
 #include <linux/types.h>
+#include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
 #include <linux/fs.h>
@@ -3113,7 +3114,7 @@ smb_proc_setattr_unix(struct dentry *d, struct iattr *attr,
        LSET(data, 32, SMB_TIME_NO_CHANGE);
        LSET(data, 40, SMB_UID_NO_CHANGE);
        LSET(data, 48, SMB_GID_NO_CHANGE);
-        LSET(data, 56, smb_filetype_from_mode(attr->ia_mode));
+        DSET(data, 56, smb_filetype_from_mode(attr->ia_mode));
        LSET(data, 60, major);
        LSET(data, 68, minor);
        LSET(data, 76, 0);
diff --git a/fs/smbfs/request.c b/fs/smbfs/request.c
index a0f296d9928a..c71c375863cc 100644
--- a/fs/smbfs/request.c
+++ b/fs/smbfs/request.c
@@ -68,7 +68,7 @@ static struct smb_request *smb_do_alloc_request(struct smb_sb_info *server,
                goto out;
        if (bufsize > 0) {
-                buf = smb_kmalloc(bufsize, GFP_NOFS);
+                buf = kmalloc(bufsize, GFP_NOFS);
                if (!buf) {
                        kmem_cache_free(req_cachep, req);
                        return NULL;
@@ -124,9 +124,8 @@ static void smb_free_request(struct smb_request *req)
 {
        atomic_dec(&req->rq_server->nr_requests);
        if (req->rq_buffer && !(req->rq_flags & SMB_REQ_STATIC))
-                smb_kfree(req->rq_buffer);
+                kfree(req->rq_buffer);
-        if (req->rq_trans2buffer)
+        kfree(req->rq_trans2buffer);
-                smb_kfree(req->rq_trans2buffer);
        kmem_cache_free(req_cachep, req);
 }
@@ -183,8 +182,7 @@ static int smb_setup_request(struct smb_request *req)
        req->rq_err = 0;
        req->rq_errno = 0;
        req->rq_fragment = 0;
-        if (req->rq_trans2buffer)
+        kfree(req->rq_trans2buffer);
-                smb_kfree(req->rq_trans2buffer);
        return 0;
 }
@@ -647,10 +645,9 @@ static int smb_recv_trans2(struct smb_sb_info *server, struct smb_request *req)
                        goto out_too_long;
                req->rq_trans2bufsize = buf_len;
-                req->rq_trans2buffer = smb_kmalloc(buf_len, GFP_NOFS);
+                req->rq_trans2buffer = kzalloc(buf_len, GFP_NOFS);
                if (!req->rq_trans2buffer)
                        goto out_no_mem;
-                memset(req->rq_trans2buffer, 0, buf_len);
                req->rq_parm = req->rq_trans2buffer;
                req->rq_data = req->rq_trans2buffer + parm_tot;
diff --git a/fs/stat.c b/fs/stat.c
index b8a0e5110ab2..24211b030f39 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -63,12 +63,12 @@ int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 EXPORT_SYMBOL(vfs_getattr);
-int vfs_stat(char __user *name, struct kstat *stat)
+int vfs_stat_fd(int dfd, char __user *name, struct kstat *stat)
 {
        struct nameidata nd;
        int error;
-        error = user_path_walk(name, &nd);
+        error = __user_walk_fd(dfd, name, LOOKUP_FOLLOW, &nd);
        if (!error) {
                error = vfs_getattr(nd.mnt, nd.dentry, stat);
                path_release(&nd);
@@ -76,14 +76,19 @@ int vfs_stat(char __user *name, struct kstat *stat)
        return error;
 }
+int vfs_stat(char __user *name, struct kstat *stat)
+{
+        return vfs_stat_fd(AT_FDCWD, name, stat);
+}
 EXPORT_SYMBOL(vfs_stat);
-int vfs_lstat(char __user *name, struct kstat *stat)
+int vfs_lstat_fd(int dfd, char __user *name, struct kstat *stat)
 {
        struct nameidata nd;
        int error;
-        error = user_path_walk_link(name, &nd);
+        error = __user_walk_fd(dfd, name, 0, &nd);
        if (!error) {
                error = vfs_getattr(nd.mnt, nd.dentry, stat);
                path_release(&nd);
@@ -91,6 +96,11 @@ int vfs_lstat(char __user *name, struct kstat *stat)
        return error;
 }
+int vfs_lstat(char __user *name, struct kstat *stat)
+{
+        return vfs_lstat_fd(AT_FDCWD, name, stat);
+}
 EXPORT_SYMBOL(vfs_lstat);
 int vfs_fstat(unsigned int fd, struct kstat *stat)
@@ -151,7 +161,7 @@ static int cp_old_stat(struct kstat *stat, struct __old_kernel_stat __user * sta
 asmlinkage long sys_stat(char __user * filename, struct __old_kernel_stat __user * statbuf)
 {
        struct kstat stat;
-        int error = vfs_stat(filename, &stat);
+        int error = vfs_stat_fd(AT_FDCWD, filename, &stat);
        if (!error)
                error = cp_old_stat(&stat, statbuf);
@@ -161,7 +171,7 @@ asmlinkage long sys_stat(char __user * filename, struct __old_kernel_stat __user
 asmlinkage long sys_lstat(char __user * filename, struct __old_kernel_stat __user * statbuf)
 {
        struct kstat stat;
-        int error = vfs_lstat(filename, &stat);
+        int error = vfs_lstat_fd(AT_FDCWD, filename, &stat);
        if (!error)
                error = cp_old_stat(&stat, statbuf);
@@ -229,27 +239,50 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf)
        return copy_to_user(statbuf,&tmp,sizeof(tmp)) ? -EFAULT : 0;
 }
-asmlinkage long sys_newstat(char __user * filename, struct stat __user * statbuf)
+asmlinkage long sys_newstat(char __user *filename, struct stat __user *statbuf)
 {
        struct kstat stat;
-        int error = vfs_stat(filename, &stat);
+        int error = vfs_stat_fd(AT_FDCWD, filename, &stat);
        if (!error)
                error = cp_new_stat(&stat, statbuf);
        return error;
 }
-asmlinkage long sys_newlstat(char __user * filename, struct stat __user * statbuf)
+asmlinkage long sys_newlstat(char __user *filename, struct stat __user *statbuf)
 {
        struct kstat stat;
-        int error = vfs_lstat(filename, &stat);
+        int error = vfs_lstat_fd(AT_FDCWD, filename, &stat);
        if (!error)
                error = cp_new_stat(&stat, statbuf);
        return error;
 }
-asmlinkage long sys_newfstat(unsigned int fd, struct stat __user * statbuf)
+asmlinkage long sys_newfstatat(int dfd, char __user *filename,
+                                struct stat __user *statbuf, int flag)
+{
+        struct kstat stat;
+        int error = -EINVAL;
+        if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
+                goto out;
+        if (flag & AT_SYMLINK_NOFOLLOW)
+                error = vfs_lstat_fd(dfd, filename, &stat);
+        else
+                error = vfs_stat_fd(dfd, filename, &stat);
+        if (!error)
+                error = cp_new_stat(&stat, statbuf);
+out:
+        return error;
+}
+asmlinkage long sys_newfstat(unsigned int fd, struct stat __user *statbuf)
 {
        struct kstat stat;
        int error = vfs_fstat(fd, &stat);
@@ -260,7 +293,8 @@ asmlinkage long sys_newfstat(unsigned int fd, struct stat __user * statbuf)
        return error;
 }
-asmlinkage long sys_readlink(const char __user * path, char __user * buf, int bufsiz)
+asmlinkage long sys_readlinkat(int dfd, const char __user *path,
+                                char __user *buf, int bufsiz)
 {
        struct nameidata nd;
        int error;
@@ -268,7 +302,7 @@ asmlinkage long sys_readlink(const char __user * path, char __user * buf, int bu
        if (bufsiz <= 0)
                return -EINVAL;
-        error = user_path_walk_link(path, &nd);
+        error = __user_walk_fd(dfd, path, 0, &nd);
        if (!error) {
                struct inode * inode = nd.dentry->d_inode;
@@ -285,6 +319,12 @@ asmlinkage long sys_readlink(const char __user * path, char __user * buf, int bu
        return error;
 }
+asmlinkage long sys_readlink(const char __user *path, char __user *buf,
+                                int bufsiz)
+{
+        return sys_readlinkat(AT_FDCWD, path, buf, bufsiz);
+}
 /* ---------- LFS-64 ----------- */
 #ifdef __ARCH_WANT_STAT64
diff --git a/fs/super.c b/fs/super.c
index 5a347a4f673a..c177b92419c5 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -72,7 +72,7 @@ static struct super_block *alloc_super(void)
                INIT_HLIST_HEAD(&s->s_anon);
                INIT_LIST_HEAD(&s->s_inodes);
                init_rwsem(&s->s_umount);
-                sema_init(&s->s_lock, 1);
+                mutex_init(&s->s_lock);
                down_write(&s->s_umount);
                s->s_count = S_BIAS;
                atomic_set(&s->s_active, 1);
@@ -700,8 +700,7 @@ struct super_block *get_sb_bdev(struct file_system_type *fs_type,
                s->s_flags = flags;
                strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
-                s->s_old_blocksize = block_size(bdev);
+                sb_set_blocksize(s, block_size(bdev));
-                sb_set_blocksize(s, s->s_old_blocksize);
                error = fill_super(s, data, flags & MS_VERBOSE ? 1 : 0);
                if (error) {
                        up_write(&s->s_umount);
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index d36780382176..49bd219275db 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -99,7 +99,7 @@ static int create_dir(struct kobject * k, struct dentry * p,
        int error;
        umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
-        down(&p->d_inode->i_sem);
+        mutex_lock(&p->d_inode->i_mutex);
        *d = lookup_one_len(n, p, strlen(n));
        if (!IS_ERR(*d)) {
                error = sysfs_make_dirent(p->d_fsdata, *d, k, mode, SYSFS_DIR);
@@ -122,7 +122,7 @@ static int create_dir(struct kobject * k, struct dentry * p,
                dput(*d);
        } else
                error = PTR_ERR(*d);
-        up(&p->d_inode->i_sem);
+        mutex_unlock(&p->d_inode->i_mutex);
        return error;
 }
@@ -246,7 +246,7 @@ static void remove_dir(struct dentry * d)
        struct dentry * parent = dget(d->d_parent);
        struct sysfs_dirent * sd;
-        down(&parent->d_inode->i_sem);
+        mutex_lock(&parent->d_inode->i_mutex);
        d_delete(d);
        sd = d->d_fsdata;
        list_del_init(&sd->s_sibling);
@@ -257,7 +257,7 @@ static void remove_dir(struct dentry * d)
        pr_debug(" o %s removing done (%d)\n",d->d_name.name,
                 atomic_read(&d->d_count));
-        up(&parent->d_inode->i_sem);
+        mutex_unlock(&parent->d_inode->i_mutex);
        dput(parent);
 }
@@ -286,7 +286,7 @@ void sysfs_remove_dir(struct kobject * kobj)
                return;
        pr_debug("sysfs %s: removing dir\n",dentry->d_name.name);
-        down(&dentry->d_inode->i_sem);
+        mutex_lock(&dentry->d_inode->i_mutex);
        parent_sd = dentry->d_fsdata;
        list_for_each_entry_safe(sd, tmp, &parent_sd->s_children, s_sibling) {
                if (!sd->s_element || !(sd->s_type & SYSFS_NOT_PINNED))
@@ -295,7 +295,7 @@ void sysfs_remove_dir(struct kobject * kobj)
                sysfs_drop_dentry(sd, dentry);
                sysfs_put(sd);
        }
-        up(&dentry->d_inode->i_sem);
+        mutex_unlock(&dentry->d_inode->i_mutex);
        remove_dir(dentry);
        /**
@@ -318,7 +318,7 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
        down_write(&sysfs_rename_sem);
        parent = kobj->parent->dentry;
-        down(&parent->d_inode->i_sem);
+        mutex_lock(&parent->d_inode->i_mutex);
        new_dentry = lookup_one_len(new_name, parent, strlen(new_name));
        if (!IS_ERR(new_dentry)) {
@@ -334,7 +334,7 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
                        error = -EEXIST;
                dput(new_dentry);
        }
-        up(&parent->d_inode->i_sem);    
+        mutex_unlock(&parent->d_inode->i_mutex);
        up_write(&sysfs_rename_sem);
        return error;
@@ -345,9 +345,9 @@ static int sysfs_dir_open(struct inode *inode, struct file *file)
        struct dentry * dentry = file->f_dentry;
        struct sysfs_dirent * parent_sd = dentry->d_fsdata;
-        down(&dentry->d_inode->i_sem);
+        mutex_lock(&dentry->d_inode->i_mutex);
        file->private_data = sysfs_new_dirent(parent_sd, NULL);
-        up(&dentry->d_inode->i_sem);
+        mutex_unlock(&dentry->d_inode->i_mutex);
        return file->private_data ? 0 : -ENOMEM;
@@ -358,9 +358,9 @@ static int sysfs_dir_close(struct inode *inode, struct file *file)
        struct dentry * dentry = file->f_dentry;
        struct sysfs_dirent * cursor = file->private_data;
-        down(&dentry->d_inode->i_sem);
+        mutex_lock(&dentry->d_inode->i_mutex);
        list_del_init(&cursor->s_sibling);
-        up(&dentry->d_inode->i_sem);
+        mutex_unlock(&dentry->d_inode->i_mutex);
        release_sysfs_dirent(cursor);
@@ -436,7 +436,7 @@ static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin)
 {
        struct dentry * dentry = file->f_dentry;
-        down(&dentry->d_inode->i_sem);
+        mutex_lock(&dentry->d_inode->i_mutex);
        switch (origin) {
                case 1:
                        offset += file->f_pos;
@@ -444,7 +444,7 @@ static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin)
                        if (offset >= 0)
                                break;
                default:
-                        up(&file->f_dentry->d_inode->i_sem);
+                        mutex_unlock(&file->f_dentry->d_inode->i_mutex);
                        return -EINVAL;
        }
        if (offset != file->f_pos) {
@@ -468,7 +468,7 @@ static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin)
                        list_add_tail(&cursor->s_sibling, p);
                }
        }
-        up(&dentry->d_inode->i_sem);
+        mutex_unlock(&dentry->d_inode->i_mutex);
        return offset;
 }
@@ -483,4 +483,3 @@ struct file_operations sysfs_dir_operations = {
 EXPORT_SYMBOL_GPL(sysfs_create_dir);
 EXPORT_SYMBOL_GPL(sysfs_remove_dir);
 EXPORT_SYMBOL_GPL(sysfs_rename_dir);
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 4013d7905e84..d0e3d8495165 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -364,9 +364,9 @@ int sysfs_add_file(struct dentry * dir, const struct attribute * attr, int type)
        umode_t mode = (attr->mode & S_IALLUGO) | S_IFREG;
        int error = 0;
-        down(&dir->d_inode->i_sem);
+        mutex_lock(&dir->d_inode->i_mutex);
        error = sysfs_make_dirent(parent_sd, NULL, (void *) attr, mode, type);
-        up(&dir->d_inode->i_sem);
+        mutex_unlock(&dir->d_inode->i_mutex);
        return error;
 }
@@ -398,7 +398,7 @@ int sysfs_update_file(struct kobject * kobj, const struct attribute * attr)
        struct dentry * victim;
        int res = -ENOENT;
-        down(&dir->d_inode->i_sem);
+        mutex_lock(&dir->d_inode->i_mutex);
        victim = lookup_one_len(attr->name, dir, strlen(attr->name));
        if (!IS_ERR(victim)) {
                /* make sure dentry is really there */
@@ -420,7 +420,7 @@ int sysfs_update_file(struct kobject * kobj, const struct attribute * attr)
                 */
                dput(victim);
        }
-        up(&dir->d_inode->i_sem);
+        mutex_unlock(&dir->d_inode->i_mutex);
        return res;
 }
@@ -441,22 +441,22 @@ int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode)
        struct iattr newattrs;
        int res = -ENOENT;
-        down(&dir->d_inode->i_sem);
+        mutex_lock(&dir->d_inode->i_mutex);
        victim = lookup_one_len(attr->name, dir, strlen(attr->name));
        if (!IS_ERR(victim)) {
                if (victim->d_inode &&
                    (victim->d_parent->d_inode == dir->d_inode)) {
                        inode = victim->d_inode;
-                        down(&inode->i_sem);
+                        mutex_lock(&inode->i_mutex);
                        newattrs.ia_mode = (mode & S_IALLUGO) |
                                                (inode->i_mode & ~S_IALLUGO);
                        newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
                        res = notify_change(victim, &newattrs);
-                        up(&inode->i_sem);
+                        mutex_unlock(&inode->i_mutex);
                }
                dput(victim);
        }
-        up(&dir->d_inode->i_sem);
+        mutex_unlock(&dir->d_inode->i_mutex);
        return res;
 }
@@ -480,4 +480,3 @@ void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr)
 EXPORT_SYMBOL_GPL(sysfs_create_file);
 EXPORT_SYMBOL_GPL(sysfs_remove_file);
 EXPORT_SYMBOL_GPL(sysfs_update_file);
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 970a33f03299..689f7bcfaf30 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -11,6 +11,7 @@
 #include <linux/pagemap.h>
 #include <linux/namei.h>
 #include <linux/backing-dev.h>
+#include <linux/capability.h>
 #include "sysfs.h"
 extern struct super_block * sysfs_sb;
@@ -201,7 +202,7 @@ const unsigned char * sysfs_get_name(struct sysfs_dirent *sd)
 /*
 * Unhashes the dentry corresponding to given sysfs_dirent
- * Called with parent inode's i_sem held.
+ * Called with parent inode's i_mutex held.
 */
 void sysfs_drop_dentry(struct sysfs_dirent * sd, struct dentry * parent)
 {
@@ -232,7 +233,7 @@ void sysfs_hash_and_remove(struct dentry * dir, const char * name)
                /* no inode means this hasn't been made visible yet */
                return;
-        down(&dir->d_inode->i_sem);
+        mutex_lock(&dir->d_inode->i_mutex);
        list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
                if (!sd->s_element)
                        continue;
@@ -243,7 +244,5 @@ void sysfs_hash_and_remove(struct dentry * dir, const char * name)
                        break;
                }
        }
-        up(&dir->d_inode->i_sem);
+        mutex_unlock(&dir->d_inode->i_mutex);
 }
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index de402fa915f2..e38d6338a20d 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -86,9 +86,9 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char
        BUG_ON(!kobj || !kobj->dentry || !name);
-        down(&dentry->d_inode->i_sem);
+        mutex_lock(&dentry->d_inode->i_mutex);
        error = sysfs_add_link(dentry, name, target);
-        up(&dentry->d_inode->i_sem);
+        mutex_unlock(&dentry->d_inode->i_mutex);
        return error;
 }
@@ -177,4 +177,3 @@ struct inode_operations sysfs_symlink_inode_operations = {
 EXPORT_SYMBOL_GPL(sysfs_create_link);
 EXPORT_SYMBOL_GPL(sysfs_remove_link);
diff --git a/fs/sysv/ChangeLog b/fs/sysv/ChangeLog
index 18e3487debdb..f403f8b91b80 100644
--- a/fs/sysv/ChangeLog
+++ b/fs/sysv/ChangeLog
@@ -54,7 +54,7 @@ Fri Jan  4 2002  Alexander Viro  <viro@parcelfarce.linux.theplanet.co.uk>
          (sysv_read_super): Likewise.
          (v7_read_super): Likewise.
-Sun Dec 30 2001  Manfred Spraul  <manfreds@colorfullife.com>
+Sun Dec 30 2001  Manfred Spraul  <manfred@colorfullife.com>
        * dir.c (dir_commit_chunk): Do not set dir->i_version.
        (sysv_readdir): Likewise.
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index 69a085abad6f..cce8b05cba5a 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -103,7 +103,7 @@ static int sysv_readdir(struct file * filp, void * dirent, filldir_t filldir)
                        offset = (char *)de - kaddr;
                        over = filldir(dirent, name, strnlen(name,SYSV_NAMELEN),
-                                        (n<<PAGE_CACHE_SHIFT) | offset,
+                                        ((loff_t)n<<PAGE_CACHE_SHIFT) | offset,
                                        fs16_to_cpu(SYSV_SB(sb), de->inode),
                                        DT_UNKNOWN);
                        if (over) {
@@ -115,7 +115,7 @@ static int sysv_readdir(struct file * filp, void * dirent, filldir_t filldir)
        }
 done:
-        filp->f_pos = (n << PAGE_CACHE_SHIFT) | offset;
+        filp->f_pos = ((loff_t)n << PAGE_CACHE_SHIFT) | offset;
        unlock_kernel();
        return 0;
 }
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index 6598a5037ac8..4fae57d9d115 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -41,7 +41,7 @@
 #define uint(x) xuint(x)
 #define xuint(x) __le ## x
-extern inline int find_next_one_bit (void * addr, int size, int offset)
+static inline int find_next_one_bit (void * addr, int size, int offset)
 {
        uintBPL_t * p = ((uintBPL_t *) addr) + (offset / BITS_PER_LONG);
        int result = offset & ~(BITS_PER_LONG-1);
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 8a388289040d..a6f2acc1f15c 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -31,6 +31,7 @@
 #include <asm/uaccess.h>
 #include <linux/kernel.h>
 #include <linux/string.h> /* memset */
+#include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/smp_lock.h>
 #include <linux/pagemap.h>
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 4014f17d382e..395e582ee542 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1957,11 +1957,6 @@ int8_t inode_bmap(struct inode *inode, int block, kernel_lb_addr *bloc, uint32_t
                printk(KERN_ERR "udf: inode_bmap: block < 0\n");
                return -1;
        }
-        if (!inode)
-        {
-                printk(KERN_ERR "udf: inode_bmap: NULL inode\n");
-                return -1;
-        }
        *extoffset = 0;
        *elen = 0;
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index faf1512173eb..3ada9dcf55b8 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -13,6 +13,7 @@
 #include <linux/string.h>
 #include <linux/quotaops.h>
 #include <linux/buffer_head.h>
+#include <linux/capability.h>
 #include <linux/sched.h>
 #include <linux/bitops.h>
 #include <asm/byteorder.h>
@@ -48,7 +49,7 @@ void ufs_free_fragments (struct inode * inode, unsigned fragment, unsigned count
        
        sb = inode->i_sb;
        uspi = UFS_SB(sb)->s_uspi;
-        usb1 = ubh_get_usb_first(USPI_UBH);
+        usb1 = ubh_get_usb_first(uspi);
        
        UFSD(("ENTER, fragment %u, count %u\n", fragment, count))
        
@@ -80,8 +81,9 @@ void ufs_free_fragments (struct inode * inode, unsigned fragment, unsigned count
        for (i = bit; i < end_bit; i++) {
                if (ubh_isclr (UCPI_UBH, ucpi->c_freeoff, i))
                        ubh_setbit (UCPI_UBH, ucpi->c_freeoff, i);
-                else ufs_error (sb, "ufs_free_fragments",
+                else 
-                        "bit already cleared for fragment %u", i);
+                        ufs_error (sb, "ufs_free_fragments",
+                                   "bit already cleared for fragment %u", i);
        }
        
        DQUOT_FREE_BLOCK (inode, count);
@@ -142,7 +144,7 @@ void ufs_free_blocks (struct inode * inode, unsigned fragment, unsigned count) {
        
        sb = inode->i_sb;
        uspi = UFS_SB(sb)->s_uspi;
-        usb1 = ubh_get_usb_first(USPI_UBH);
+        usb1 = ubh_get_usb_first(uspi);
        UFSD(("ENTER, fragment %u, count %u\n", fragment, count))
        
@@ -246,7 +248,7 @@ unsigned ufs_new_fragments (struct inode * inode, __fs32 * p, unsigned fragment,
        
        sb = inode->i_sb;
        uspi = UFS_SB(sb)->s_uspi;
-        usb1 = ubh_get_usb_first(USPI_UBH);
+        usb1 = ubh_get_usb_first(uspi);
        *err = -ENOSPC;
        lock_super (sb);
@@ -406,7 +408,7 @@ ufs_add_fragments (struct inode * inode, unsigned fragment,
        
        sb = inode->i_sb;
        uspi = UFS_SB(sb)->s_uspi;
-        usb1 = ubh_get_usb_first (USPI_UBH);
+        usb1 = ubh_get_usb_first (uspi);
        count = newcount - oldcount;
        
        cgno = ufs_dtog(fragment);
@@ -489,7 +491,7 @@ static unsigned ufs_alloc_fragments (struct inode * inode, unsigned cgno,
        sb = inode->i_sb;
        uspi = UFS_SB(sb)->s_uspi;
-        usb1 = ubh_get_usb_first(USPI_UBH);
+        usb1 = ubh_get_usb_first(uspi);
        oldcg = cgno;
        
        /*
@@ -605,7 +607,7 @@ static unsigned ufs_alloccg_block (struct inode * inode,
        sb = inode->i_sb;
        uspi = UFS_SB(sb)->s_uspi;
-        usb1 = ubh_get_usb_first(USPI_UBH);
+        usb1 = ubh_get_usb_first(uspi);
        ucg = ubh_get_ucg(UCPI_UBH);
        if (goal == 0) {
@@ -662,7 +664,7 @@ static unsigned ufs_bitmap_search (struct super_block * sb,
        UFSD(("ENTER, cg %u, goal %u, count %u\n", ucpi->c_cgx, goal, count))
        uspi = UFS_SB(sb)->s_uspi;
-        usb1 = ubh_get_usb_first (USPI_UBH);
+        usb1 = ubh_get_usb_first (uspi);
        ucg = ubh_get_ucg(UCPI_UBH);
        if (goal)
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 0938945b9cbc..c7a47ed4f430 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -72,7 +72,7 @@ void ufs_free_inode (struct inode * inode)
        sb = inode->i_sb;
        uspi = UFS_SB(sb)->s_uspi;
-        usb1 = ubh_get_usb_first(USPI_UBH);
+        usb1 = ubh_get_usb_first(uspi);
        
        ino = inode->i_ino;
@@ -167,7 +167,7 @@ struct inode * ufs_new_inode(struct inode * dir, int mode)
        ufsi = UFS_I(inode);
        sbi = UFS_SB(sb);
        uspi = sbi->s_uspi;
-        usb1 = ubh_get_usb_first(USPI_UBH);
+        usb1 = ubh_get_usb_first(uspi);
        lock_super (sb);
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 55f4aa16e3fc..e0c04e36a051 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -61,7 +61,7 @@ static int ufs_block_to_path(struct inode *inode, sector_t i_block, sector_t off
        int n = 0;
-        UFSD(("ptrs=uspi->s_apb = %d,double_blocks=%d \n",ptrs,double_blocks));
+        UFSD(("ptrs=uspi->s_apb = %d,double_blocks=%ld \n",ptrs,double_blocks));
        if (i_block < 0) {
                ufs_warning(inode->i_sb, "ufs_block_to_path", "block < 0");
        } else if (i_block < direct_blocks) {
@@ -104,7 +104,7 @@ u64  ufs_frag_map(struct inode *inode, sector_t frag)
        unsigned flags = UFS_SB(sb)->s_flags;
        u64 temp = 0L;
-        UFSD((": frag = %lu  depth = %d\n",frag,depth));
+        UFSD((": frag = %llu  depth = %d\n", (unsigned long long)frag, depth));
        UFSD((": uspi->s_fpbshift = %d ,uspi->s_apbmask = %x, mask=%llx\n",uspi->s_fpbshift,uspi->s_apbmask,mask));
        if (depth == 0)
@@ -365,9 +365,10 @@ repeat:
                sync_dirty_buffer(bh);
        inode->i_ctime = CURRENT_TIME_SEC;
        mark_inode_dirty(inode);
+        UFSD(("result %u\n", tmp + blockoff));
 out:
        brelse (bh);
-        UFSD(("EXIT, result %u\n", tmp + blockoff))
+        UFSD(("EXIT\n"));
        return result;
 }
@@ -386,7 +387,7 @@ static int ufs_getfrag_block (struct inode *inode, sector_t fragment, struct buf
        
        if (!create) {
                phys64 = ufs_frag_map(inode, fragment);
-                UFSD(("phys64 = %lu \n",phys64));
+                UFSD(("phys64 = %llu \n",phys64));
                if (phys64)
                        map_bh(bh_result, sb, phys64);
                return 0;
@@ -401,7 +402,7 @@ static int ufs_getfrag_block (struct inode *inode, sector_t fragment, struct buf
        lock_kernel();
-        UFSD(("ENTER, ino %lu, fragment %u\n", inode->i_ino, fragment))
+        UFSD(("ENTER, ino %lu, fragment %llu\n", inode->i_ino, (unsigned long long)fragment))
        if (fragment < 0)
                goto abort_negative;
        if (fragment >
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 54828ebcf1ba..d4aacee593ff 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -221,7 +221,7 @@ void ufs_error (struct super_block * sb, const char * function,
        va_list args;
        uspi = UFS_SB(sb)->s_uspi;
-        usb1 = ubh_get_usb_first(USPI_UBH);
+        usb1 = ubh_get_usb_first(uspi);
        
        if (!(sb->s_flags & MS_RDONLY)) {
                usb1->fs_clean = UFS_FSBAD;
@@ -253,7 +253,7 @@ void ufs_panic (struct super_block * sb, const char * function,
        va_list args;
        
        uspi = UFS_SB(sb)->s_uspi;
-        usb1 = ubh_get_usb_first(USPI_UBH);
+        usb1 = ubh_get_usb_first(uspi);
        
        if (!(sb->s_flags & MS_RDONLY)) {
                usb1->fs_clean = UFS_FSBAD;
@@ -420,21 +420,18 @@ static int ufs_read_cylinder_structures (struct super_block *sb) {
                if (i + uspi->s_fpb > blks)
                        size = (blks - i) * uspi->s_fsize;
-                if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) {
+                if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) 
                        ubh = ubh_bread(sb,
                                fs64_to_cpu(sb, usb->fs_u11.fs_u2.fs_csaddr) + i, size);
-                        if (!ubh)
+                else 
-                                goto failed;
-                        ubh_ubhcpymem (space, ubh, size);
-                        sbi->s_csp[ufs_fragstoblks(i)]=(struct ufs_csum *)space;
-                }
-                else {
                        ubh = ubh_bread(sb, uspi->s_csaddr + i, size);
-                        if (!ubh)
+                
-                                goto failed;
+                if (!ubh)
-                        ubh_ubhcpymem(space, ubh, size);
+                        goto failed;
-                        sbi->s_csp[ufs_fragstoblks(i)]=(struct ufs_csum *)space;
-                }
+                ubh_ubhcpymem (space, ubh, size);
+                sbi->s_csp[ufs_fragstoblks(i)]=(struct ufs_csum *)space;
                space += size;
                ubh_brelse (ubh);
                ubh = NULL;
@@ -539,6 +536,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
        struct inode *inode;
        unsigned block_size, super_block_size;
        unsigned flags;
+        unsigned super_block_offset;
        uspi = NULL;
        ubh = NULL;
@@ -586,10 +584,11 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
        if (!uspi)
                goto failed;
+        super_block_offset=UFS_SBLOCK;
        /* Keep 2Gig file limit. Some UFS variants need to override 
           this but as I don't know which I'll let those in the know loosen
           the rules */
-           
        switch (sbi->s_mount_opt & UFS_MOUNT_UFSTYPE) {
        case UFS_MOUNT_UFSTYPE_44BSD:
                UFSD(("ufstype=44bsd\n"))
@@ -601,7 +600,8 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
                flags |= UFS_DE_44BSD | UFS_UID_44BSD | UFS_ST_44BSD | UFS_CG_44BSD;
                break;
        case UFS_MOUNT_UFSTYPE_UFS2:
-                UFSD(("ufstype=ufs2\n"))
+                UFSD(("ufstype=ufs2\n"));
+                super_block_offset=SBLOCK_UFS2;
                uspi->s_fsize = block_size = 512;
                uspi->s_fmask = ~(512 - 1);
                uspi->s_fshift = 9;
@@ -725,19 +725,16 @@ again:
        /*
         * read ufs super block from device
         */
-        if ( (flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) {
-                ubh = ubh_bread_uspi(uspi, sb, uspi->s_sbbase + SBLOCK_UFS2/block_size, super_block_size);
+        ubh = ubh_bread_uspi(uspi, sb, uspi->s_sbbase + super_block_offset/block_size, super_block_size);
-        }
+        
-        else {
-                ubh = ubh_bread_uspi(uspi, sb, uspi->s_sbbase + UFS_SBLOCK/block_size, super_block_size);
-        }
        if (!ubh) 
            goto failed;
        
-        usb1 = ubh_get_usb_first(USPI_UBH);
+        usb1 = ubh_get_usb_first(uspi);
-        usb2 = ubh_get_usb_second(USPI_UBH);
+        usb2 = ubh_get_usb_second(uspi);
-        usb3 = ubh_get_usb_third(USPI_UBH);
+        usb3 = ubh_get_usb_third(uspi);
        usb  = (struct ufs_super_block *)
                ((struct ufs_buffer_head *)uspi)->bh[0]->b_data ;
@@ -1006,8 +1003,8 @@ static void ufs_write_super (struct super_block *sb) {
        UFSD(("ENTER\n"))
        flags = UFS_SB(sb)->s_flags;
        uspi = UFS_SB(sb)->s_uspi;
-        usb1 = ubh_get_usb_first(USPI_UBH);
+        usb1 = ubh_get_usb_first(uspi);
-        usb3 = ubh_get_usb_third(USPI_UBH);
+        usb3 = ubh_get_usb_third(uspi);
        if (!(sb->s_flags & MS_RDONLY)) {
                usb1->fs_time = cpu_to_fs32(sb, get_seconds());
@@ -1049,8 +1046,8 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
        
        uspi = UFS_SB(sb)->s_uspi;
        flags = UFS_SB(sb)->s_flags;
-        usb1 = ubh_get_usb_first(USPI_UBH);
+        usb1 = ubh_get_usb_first(uspi);
-        usb3 = ubh_get_usb_third(USPI_UBH);
+        usb3 = ubh_get_usb_third(uspi);
        
        /*
         * Allow the "check" option to be passed as a remount option.
@@ -1124,7 +1121,7 @@ static int ufs_statfs (struct super_block *sb, struct kstatfs *buf)
        lock_kernel();
        uspi = UFS_SB(sb)->s_uspi;
-        usb1 = ubh_get_usb_first (USPI_UBH);
+        usb1 = ubh_get_usb_first (uspi);
        usb  = (struct ufs_super_block *)
                ((struct ufs_buffer_head *)uspi)->bh[0]->b_data ;
        
@@ -1275,7 +1272,7 @@ static ssize_t ufs_quota_write(struct super_block *sb, int type,
        size_t towrite = len;
        struct buffer_head *bh;
-        down(&inode->i_sem);
+        mutex_lock(&inode->i_mutex);
        while (towrite > 0) {
                tocopy = sb->s_blocksize - offset < towrite ?
                                sb->s_blocksize - offset : towrite;
@@ -1296,14 +1293,16 @@ static ssize_t ufs_quota_write(struct super_block *sb, int type,
                blk++;
        }
 out:
-        if (len == towrite)
+        if (len == towrite) {
+                mutex_unlock(&inode->i_mutex);
                return err;
+        }
        if (inode->i_size < off+len-towrite)
                i_size_write(inode, off+len-towrite);
        inode->i_version++;
        inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
        mark_inode_dirty(inode);
-        up(&inode->i_sem);
+        mutex_unlock(&inode->i_mutex);
        return len - towrite;
 }
diff --git a/fs/ufs/util.h b/fs/ufs/util.h
index b2640076679a..48d6d9bcc157 100644
--- a/fs/ufs/util.h
+++ b/fs/ufs/util.h
@@ -249,18 +249,28 @@ extern void _ubh_memcpyubh_(struct ufs_sb_private_info *, struct ufs_buffer_head
 /*
- * macros to get important structures from ufs_buffer_head
+ * macros and inline function to get important structures from ufs_sb_private_info
 */
-#define ubh_get_usb_first(ubh) \
-        ((struct ufs_super_block_first *)((ubh)->bh[0]->b_data))
-#define ubh_get_usb_second(ubh) \
+static inline void *get_usb_offset(struct ufs_sb_private_info *uspi,
-        ((struct ufs_super_block_second *)(ubh)-> \
+                                   unsigned int offset)
-        bh[UFS_SECTOR_SIZE >> uspi->s_fshift]->b_data + (UFS_SECTOR_SIZE & ~uspi->s_fmask))
+{
+        unsigned int index;
+        
+        index = offset >> uspi->s_fshift;
+        offset &= ~uspi->s_fmask;
+        return uspi->s_ubh.bh[index]->b_data + offset;
+}
+#define ubh_get_usb_first(uspi) \
+        ((struct ufs_super_block_first *)get_usb_offset((uspi), 0))
+#define ubh_get_usb_second(uspi) \
+        ((struct ufs_super_block_second *)get_usb_offset((uspi), UFS_SECTOR_SIZE))
+#define ubh_get_usb_third(uspi) \
+        ((struct ufs_super_block_third *)get_usb_offset((uspi), 2*UFS_SECTOR_SIZE))
-#define ubh_get_usb_third(ubh) \
-        ((struct ufs_super_block_third *)((ubh)-> \
-        bh[UFS_SECTOR_SIZE*2 >> uspi->s_fshift]->b_data + (UFS_SECTOR_SIZE*2 & ~uspi->s_fmask)))
 #define ubh_get_ucg(ubh) \
        ((struct ufs_cylinder_group *)((ubh)->bh[0]->b_data))
diff --git a/fs/xattr.c b/fs/xattr.c
index bcc2156d4d28..80eca7d3d69f 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -19,6 +19,149 @@
 #include <linux/fsnotify.h>
 #include <asm/uaccess.h>
+/*
+ * Check permissions for extended attribute access.  This is a bit complicated
+ * because different namespaces have very different rules.
+ */
+static int
+xattr_permission(struct inode *inode, const char *name, int mask)
+{
+        /*
+         * We can never set or remove an extended attribute on a read-only
+         * filesystem  or on an immutable / append-only inode.
+         */
+        if (mask & MAY_WRITE) {
+                if (IS_RDONLY(inode))
+                        return -EROFS;
+                if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+                        return -EPERM;
+        }
+        /*
+         * No restriction for security.* and system.* from the VFS.  Decision
+         * on these is left to the underlying filesystem / security module.
+         */
+        if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) ||
+            !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
+                return 0;
+        /*
+         * The trusted.* namespace can only accessed by a privilegued user.
+         */
+        if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN))
+                return (capable(CAP_SYS_ADMIN) ? 0 : -EPERM);
+        if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) {
+                if (!S_ISREG(inode->i_mode) &&
+                    (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
+                        return -EPERM;
+        }
+        return permission(inode, mask, NULL);
+}
+int
+vfs_setxattr(struct dentry *dentry, char *name, void *value,
+                size_t size, int flags)
+{
+        struct inode *inode = dentry->d_inode;
+        int error;
+        error = xattr_permission(inode, name, MAY_WRITE);
+        if (error)
+                return error;
+        mutex_lock(&inode->i_mutex);
+        error = security_inode_setxattr(dentry, name, value, size, flags);
+        if (error)
+                goto out;
+        error = -EOPNOTSUPP;
+        if (inode->i_op->setxattr) {
+                error = inode->i_op->setxattr(dentry, name, value, size, flags);
+                if (!error) {
+                        fsnotify_xattr(dentry);
+                        security_inode_post_setxattr(dentry, name, value,
+                                                     size, flags);
+                }
+        } else if (!strncmp(name, XATTR_SECURITY_PREFIX,
+                                XATTR_SECURITY_PREFIX_LEN)) {
+                const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
+                error = security_inode_setsecurity(inode, suffix, value,
+                                                   size, flags);
+                if (!error)
+                        fsnotify_xattr(dentry);
+        }
+out:
+        mutex_unlock(&inode->i_mutex);
+        return error;
+}
+EXPORT_SYMBOL_GPL(vfs_setxattr);
+ssize_t
+vfs_getxattr(struct dentry *dentry, char *name, void *value, size_t size)
+{
+        struct inode *inode = dentry->d_inode;
+        int error;
+        error = xattr_permission(inode, name, MAY_READ);
+        if (error)
+                return error;
+        error = security_inode_getxattr(dentry, name);
+        if (error)
+                return error;
+        if (inode->i_op->getxattr)
+                error = inode->i_op->getxattr(dentry, name, value, size);
+        else
+                error = -EOPNOTSUPP;
+        if (!strncmp(name, XATTR_SECURITY_PREFIX,
+                                XATTR_SECURITY_PREFIX_LEN)) {
+                const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
+                int ret = security_inode_getsecurity(inode, suffix, value,
+                                                     size, error);
+                /*
+                 * Only overwrite the return value if a security module
+                 * is actually active.
+                 */
+                if (ret != -EOPNOTSUPP)
+                        error = ret;
+        }
+        return error;
+}
+EXPORT_SYMBOL_GPL(vfs_getxattr);
+int
+vfs_removexattr(struct dentry *dentry, char *name)
+{
+        struct inode *inode = dentry->d_inode;
+        int error;
+        if (!inode->i_op->removexattr)
+                return -EOPNOTSUPP;
+        error = xattr_permission(inode, name, MAY_WRITE);
+        if (error)
+                return error;
+        error = security_inode_removexattr(dentry, name);
+        if (error)
+                return error;
+        mutex_lock(&inode->i_mutex);
+        error = inode->i_op->removexattr(dentry, name);
+        mutex_unlock(&inode->i_mutex);
+        if (!error)
+                fsnotify_xattr(dentry);
+        return error;
+}
+EXPORT_SYMBOL_GPL(vfs_removexattr);
 /*
 * Extended attribute SET operations
 */
@@ -51,29 +194,7 @@ setxattr(struct dentry *d, char __user *name, void __user *value,
                }
        }
-        down(&d->d_inode->i_sem);
+        error = vfs_setxattr(d, kname, kvalue, size, flags);
-        error = security_inode_setxattr(d, kname, kvalue, size, flags);
-        if (error)
-                goto out;
-        error = -EOPNOTSUPP;
-        if (d->d_inode->i_op && d->d_inode->i_op->setxattr) {
-                error = d->d_inode->i_op->setxattr(d, kname, kvalue,
-                                                   size, flags);
-                if (!error) {
-                        fsnotify_xattr(d);
-                        security_inode_post_setxattr(d, kname, kvalue,
-                                                     size, flags);
-                }
-        } else if (!strncmp(kname, XATTR_SECURITY_PREFIX,
-                            sizeof XATTR_SECURITY_PREFIX - 1)) {
-                const char *suffix = kname + sizeof XATTR_SECURITY_PREFIX - 1;
-                error = security_inode_setsecurity(d->d_inode, suffix, kvalue,
-                                                   size, flags);
-                if (!error)
-                        fsnotify_xattr(d);
-        }
-out:
-        up(&d->d_inode->i_sem);
        kfree(kvalue);
        return error;
 }
@@ -147,22 +268,7 @@ getxattr(struct dentry *d, char __user *name, void __user *value, size_t size)
                        return -ENOMEM;
        }
-        error = security_inode_getxattr(d, kname);
+        error = vfs_getxattr(d, kname, kvalue, size);
-        if (error)
-                goto out;
-        error = -EOPNOTSUPP;
-        if (d->d_inode->i_op && d->d_inode->i_op->getxattr)
-                error = d->d_inode->i_op->getxattr(d, kname, kvalue, size);
-        if (!strncmp(kname, XATTR_SECURITY_PREFIX,
-                     sizeof XATTR_SECURITY_PREFIX - 1)) {
-                const char *suffix = kname + sizeof XATTR_SECURITY_PREFIX - 1;
-                int rv = security_inode_getsecurity(d->d_inode, suffix, kvalue,
-                                                    size, error);
-                /* Security module active: overwrite error value */
-                if (rv != -EOPNOTSUPP)
-                        error = rv;
-        }
        if (error > 0) {
                if (size && copy_to_user(value, kvalue, error))
                        error = -EFAULT;
@@ -171,7 +277,6 @@ getxattr(struct dentry *d, char __user *name, void __user *value, size_t size)
                   than XATTR_SIZE_MAX bytes. Not possible. */
                error = -E2BIG;
        }
-out:
        kfree(kvalue);
        return error;
 }
@@ -318,19 +423,7 @@ removexattr(struct dentry *d, char __user *name)
        if (error < 0)
                return error;
-        error = -EOPNOTSUPP;
+        return vfs_removexattr(d, kname);
-        if (d->d_inode->i_op && d->d_inode->i_op->removexattr) {
-                error = security_inode_removexattr(d, kname);
-                if (error)
-                        goto out;
-                down(&d->d_inode->i_sem);
-                error = d->d_inode->i_op->removexattr(d, kname);
-                up(&d->d_inode->i_sem);
-                if (!error)
-                        fsnotify_xattr(d);
-        }
-out:
-        return error;
 }
 asmlinkage long
diff --git a/fs/xfs/Kbuild b/fs/xfs/Kbuild
new file mode 100644
index 000000000000..2566e96706f1
--- /dev/null
+++ b/fs/xfs/Kbuild
@@ -0,0 +1,6 @@
+#
+# The xfs people like to share Makefile with 2.6 and 2.4.
+# Utilise file named Kbuild file which has precedence over Makefile.
+#
+include $(srctree)/$(obj)/Makefile-linux-2.6
diff --git a/fs/xfs/linux-2.6/mutex.h b/fs/xfs/linux-2.6/mutex.h
index ce773d89a923..2a88d56c4dc2 100644
--- a/fs/xfs/linux-2.6/mutex.h
+++ b/fs/xfs/linux-2.6/mutex.h
@@ -18,22 +18,8 @@
 #ifndef __XFS_SUPPORT_MUTEX_H__
 #define __XFS_SUPPORT_MUTEX_H__
-#include <linux/spinlock.h>
+#include <linux/mutex.h>
-#include <asm/semaphore.h>
-/*
+typedef struct mutex mutex_t;
- * Map the mutex'es from IRIX to Linux semaphores.
- *
- * Destroy just simply initializes to -99 which should block all other
- * callers.
- */
-#define MUTEX_DEFAULT           0x0
-typedef struct semaphore        mutex_t;
-#define mutex_init(lock, type, name)            sema_init(lock, 1)
-#define mutex_destroy(lock)                     sema_init(lock, -99)
-#define mutex_lock(lock, num)                   down(lock)
-#define mutex_trylock(lock)                     (down_trylock(lock) ? 0 : 1)
-#define mutex_unlock(lock)                      up(lock)
 #endif /* __XFS_SUPPORT_MUTEX_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 94d3cdfbf9b8..120626789406 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -40,11 +40,10 @@
 #include "xfs_rw.h"
 #include "xfs_iomap.h"
 #include <linux/mpage.h>
+#include <linux/pagevec.h>
 #include <linux/writeback.h>
 STATIC void xfs_count_page_state(struct page *, int *, int *, int *);
-STATIC void xfs_convert_page(struct inode *, struct page *, xfs_iomap_t *,
-                struct writeback_control *wbc, void *, int, int);
 #if defined(XFS_RW_TRACE)
 void
@@ -55,17 +54,15 @@ xfs_page_trace(
        int             mask)
 {
        xfs_inode_t     *ip;
-        bhv_desc_t      *bdp;
        vnode_t         *vp = LINVFS_GET_VP(inode);
        loff_t          isize = i_size_read(inode);
-        loff_t          offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
+        loff_t          offset = page_offset(page);
        int             delalloc = -1, unmapped = -1, unwritten = -1;
        if (page_has_buffers(page))
                xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
-        bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops);
+        ip = xfs_vtoi(vp);
-        ip = XFS_BHVTOI(bdp);
        if (!ip->i_rwtrace)
                return;
@@ -103,15 +100,56 @@ xfs_finish_ioend(
                queue_work(xfsdatad_workqueue, &ioend->io_work);
 }
+/*
+ * We're now finished for good with this ioend structure.
+ * Update the page state via the associated buffer_heads,
+ * release holds on the inode and bio, and finally free
+ * up memory.  Do not use the ioend after this.
+ */
 STATIC void
 xfs_destroy_ioend(
        xfs_ioend_t             *ioend)
 {
+        struct buffer_head      *bh, *next;
+        for (bh = ioend->io_buffer_head; bh; bh = next) {
+                next = bh->b_private;
+                bh->b_end_io(bh, ioend->io_uptodate);
+        }
        vn_iowake(ioend->io_vnode);
        mempool_free(ioend, xfs_ioend_pool);
 }
 /*
+ * Buffered IO write completion for delayed allocate extents.
+ * TODO: Update ondisk isize now that we know the file data
+ * has been flushed (i.e. the notorious "NULL file" problem).
+ */
+STATIC void
+xfs_end_bio_delalloc(
+        void                    *data)
+{
+        xfs_ioend_t             *ioend = data;
+        xfs_destroy_ioend(ioend);
+}
+/*
+ * Buffered IO write completion for regular, written extents.
+ */
+STATIC void
+xfs_end_bio_written(
+        void                    *data)
+{
+        xfs_ioend_t             *ioend = data;
+        xfs_destroy_ioend(ioend);
+}
+/*
+ * IO write completion for unwritten extents.
+ *
 * Issue transactions to convert a buffer range from unwritten
 * to written extents.
 */
@@ -123,21 +161,10 @@ xfs_end_bio_unwritten(
        vnode_t                 *vp = ioend->io_vnode;
        xfs_off_t               offset = ioend->io_offset;
        size_t                  size = ioend->io_size;
-        struct buffer_head      *bh, *next;
        int                     error;
        if (ioend->io_uptodate)
                VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error);
-        /* ioend->io_buffer_head is only non-NULL for buffered I/O */
-        for (bh = ioend->io_buffer_head; bh; bh = next) {
-                next = bh->b_private;
-                bh->b_end_io = NULL;
-                clear_buffer_unwritten(bh);
-                end_buffer_async_write(bh, ioend->io_uptodate);
-        }
        xfs_destroy_ioend(ioend);
 }
@@ -149,7 +176,8 @@ xfs_end_bio_unwritten(
 */
 STATIC xfs_ioend_t *
 xfs_alloc_ioend(
-        struct inode            *inode)
+        struct inode            *inode,
+        unsigned int            type)
 {
        xfs_ioend_t             *ioend;
@@ -162,45 +190,25 @@ xfs_alloc_ioend(
         */
        atomic_set(&ioend->io_remaining, 1);
        ioend->io_uptodate = 1; /* cleared if any I/O fails */
+        ioend->io_list = NULL;
+        ioend->io_type = type;
        ioend->io_vnode = LINVFS_GET_VP(inode);
        ioend->io_buffer_head = NULL;
+        ioend->io_buffer_tail = NULL;
        atomic_inc(&ioend->io_vnode->v_iocount);
        ioend->io_offset = 0;
        ioend->io_size = 0;
-        INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten, ioend);
+        if (type == IOMAP_UNWRITTEN)
+                INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten, ioend);
+        else if (type == IOMAP_DELAY)
+                INIT_WORK(&ioend->io_work, xfs_end_bio_delalloc, ioend);
+        else
+                INIT_WORK(&ioend->io_work, xfs_end_bio_written, ioend);
        return ioend;
 }
-void
-linvfs_unwritten_done(
-        struct buffer_head      *bh,
-        int                     uptodate)
-{
-        xfs_ioend_t             *ioend = bh->b_private;
-        static spinlock_t       unwritten_done_lock = SPIN_LOCK_UNLOCKED;
-        unsigned long           flags;
-        ASSERT(buffer_unwritten(bh));
-        bh->b_end_io = NULL;
-        if (!uptodate)
-                ioend->io_uptodate = 0;
-        /*
-         * Deep magic here.  We reuse b_private in the buffer_heads to build
-         * a chain for completing the I/O from user context after we've issued
-         * a transaction to convert the unwritten extent.
-         */
-        spin_lock_irqsave(&unwritten_done_lock, flags);
-        bh->b_private = ioend->io_buffer_head;
-        ioend->io_buffer_head = bh;
-        spin_unlock_irqrestore(&unwritten_done_lock, flags);
-        xfs_finish_ioend(ioend);
-}
 STATIC int
 xfs_map_blocks(
        struct inode            *inode,
@@ -218,138 +226,283 @@ xfs_map_blocks(
        return -error;
 }
+STATIC inline int
+xfs_iomap_valid(
+        xfs_iomap_t             *iomapp,
+        loff_t                  offset)
+{
+        return offset >= iomapp->iomap_offset &&
+                offset < iomapp->iomap_offset + iomapp->iomap_bsize;
+}
 /*
- * Finds the corresponding mapping in block @map array of the
+ * BIO completion handler for buffered IO.
- * given @offset within a @page.
 */
-STATIC xfs_iomap_t *
+STATIC int
-xfs_offset_to_map(
+xfs_end_bio(
+        struct bio              *bio,
+        unsigned int            bytes_done,
+        int                     error)
+{
+        xfs_ioend_t             *ioend = bio->bi_private;
+        if (bio->bi_size)
+                return 1;
+        ASSERT(ioend);
+        ASSERT(atomic_read(&bio->bi_cnt) >= 1);
+        /* Toss bio and pass work off to an xfsdatad thread */
+        if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
+                ioend->io_uptodate = 0;
+        bio->bi_private = NULL;
+        bio->bi_end_io = NULL;
+        bio_put(bio);
+        xfs_finish_ioend(ioend);
+        return 0;
+}
+STATIC void
+xfs_submit_ioend_bio(
+        xfs_ioend_t     *ioend,
+        struct bio      *bio)
+{
+        atomic_inc(&ioend->io_remaining);
+        bio->bi_private = ioend;
+        bio->bi_end_io = xfs_end_bio;
+        submit_bio(WRITE, bio);
+        ASSERT(!bio_flagged(bio, BIO_EOPNOTSUPP));
+        bio_put(bio);
+}
+STATIC struct bio *
+xfs_alloc_ioend_bio(
+        struct buffer_head      *bh)
+{
+        struct bio              *bio;
+        int                     nvecs = bio_get_nr_vecs(bh->b_bdev);
+        do {
+                bio = bio_alloc(GFP_NOIO, nvecs);
+                nvecs >>= 1;
+        } while (!bio);
+        ASSERT(bio->bi_private == NULL);
+        bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
+        bio->bi_bdev = bh->b_bdev;
+        bio_get(bio);
+        return bio;
+}
+STATIC void
+xfs_start_buffer_writeback(
+        struct buffer_head      *bh)
+{
+        ASSERT(buffer_mapped(bh));
+        ASSERT(buffer_locked(bh));
+        ASSERT(!buffer_delay(bh));
+        ASSERT(!buffer_unwritten(bh));
+        mark_buffer_async_write(bh);
+        set_buffer_uptodate(bh);
+        clear_buffer_dirty(bh);
+}
+STATIC void
+xfs_start_page_writeback(
        struct page             *page,
-        xfs_iomap_t             *iomapp,
+        struct writeback_control *wbc,
-        unsigned long           offset)
+        int                     clear_dirty,
+        int                     buffers)
+{
+        ASSERT(PageLocked(page));
+        ASSERT(!PageWriteback(page));
+        set_page_writeback(page);
+        if (clear_dirty)
+                clear_page_dirty(page);
+        unlock_page(page);
+        if (!buffers) {
+                end_page_writeback(page);
+                wbc->pages_skipped++;   /* We didn't write this page */
+        }
+}
+static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh)
+{
+        return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
+}
+/*
+ * Submit all of the bios for all of the ioends we have saved up, covering the
+ * initial writepage page and also any probed pages.
+ *
+ * Because we may have multiple ioends spanning a page, we need to start
+ * writeback on all the buffers before we submit them for I/O. If we mark the
+ * buffers as we got, then we can end up with a page that only has buffers
+ * marked async write and I/O complete on can occur before we mark the other
+ * buffers async write.
+ *
+ * The end result of this is that we trip a bug in end_page_writeback() because
+ * we call it twice for the one page as the code in end_buffer_async_write()
+ * assumes that all buffers on the page are started at the same time.
+ *
+ * The fix is two passes across the ioend list - one to start writeback on the
+ * bufferheads, and then the second one submit them for I/O.
+ */
+STATIC void
+xfs_submit_ioend(
+        xfs_ioend_t             *ioend)
+{
+        xfs_ioend_t             *head = ioend;
+        xfs_ioend_t             *next;
+        struct buffer_head      *bh;
+        struct bio              *bio;
+        sector_t                lastblock = 0;
+        /* Pass 1 - start writeback */
+        do {
+                next = ioend->io_list;
+                for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
+                        xfs_start_buffer_writeback(bh);
+                }
+        } while ((ioend = next) != NULL);
+        /* Pass 2 - submit I/O */
+        ioend = head;
+        do {
+                next = ioend->io_list;
+                bio = NULL;
+                for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
+                        if (!bio) {
+ retry:
+                                bio = xfs_alloc_ioend_bio(bh);
+                        } else if (bh->b_blocknr != lastblock + 1) {
+                                xfs_submit_ioend_bio(ioend, bio);
+                                goto retry;
+                        }
+                        if (bio_add_buffer(bio, bh) != bh->b_size) {
+                                xfs_submit_ioend_bio(ioend, bio);
+                                goto retry;
+                        }
+                        lastblock = bh->b_blocknr;
+                }
+                if (bio)
+                        xfs_submit_ioend_bio(ioend, bio);
+                xfs_finish_ioend(ioend);
+        } while ((ioend = next) != NULL);
+}
+/*
+ * Cancel submission of all buffer_heads so far in this endio.
+ * Toss the endio too.  Only ever called for the initial page
+ * in a writepage request, so only ever one page.
+ */
+STATIC void
+xfs_cancel_ioend(
+        xfs_ioend_t             *ioend)
+{
+        xfs_ioend_t             *next;
+        struct buffer_head      *bh, *next_bh;
+        do {
+                next = ioend->io_list;
+                bh = ioend->io_buffer_head;
+                do {
+                        next_bh = bh->b_private;
+                        clear_buffer_async_write(bh);
+                        unlock_buffer(bh);
+                } while ((bh = next_bh) != NULL);
+                vn_iowake(ioend->io_vnode);
+                mempool_free(ioend, xfs_ioend_pool);
+        } while ((ioend = next) != NULL);
+}
+/*
+ * Test to see if we've been building up a completion structure for
+ * earlier buffers -- if so, we try to append to this ioend if we
+ * can, otherwise we finish off any current ioend and start another.
+ * Return true if we've finished the given ioend.
+ */
+STATIC void
+xfs_add_to_ioend(
+        struct inode            *inode,
+        struct buffer_head      *bh,
+        xfs_off_t               offset,
+        unsigned int            type,
+        xfs_ioend_t             **result,
+        int                     need_ioend)
 {
-        loff_t                  full_offset;    /* offset from start of file */
+        xfs_ioend_t             *ioend = *result;
-        ASSERT(offset < PAGE_CACHE_SIZE);
+        if (!ioend || need_ioend || type != ioend->io_type) {
+                xfs_ioend_t     *previous = *result;
-        full_offset = page->index;              /* NB: using 64bit number */
+                ioend = xfs_alloc_ioend(inode, type);
-        full_offset <<= PAGE_CACHE_SHIFT;       /* offset from file start */
+                ioend->io_offset = offset;
-        full_offset += offset;                  /* offset from page start */
+                ioend->io_buffer_head = bh;
+                ioend->io_buffer_tail = bh;
+                if (previous)
+                        previous->io_list = ioend;
+                *result = ioend;
+        } else {
+                ioend->io_buffer_tail->b_private = bh;
+                ioend->io_buffer_tail = bh;
+        }
-        if (full_offset < iomapp->iomap_offset)
+        bh->b_private = NULL;
-                return NULL;
+        ioend->io_size += bh->b_size;
-        if (iomapp->iomap_offset + (iomapp->iomap_bsize -1) >= full_offset)
-                return iomapp;
-        return NULL;
 }
 STATIC void
 xfs_map_at_offset(
-        struct page             *page,
        struct buffer_head      *bh,
-        unsigned long           offset,
+        loff_t                  offset,
        int                     block_bits,
        xfs_iomap_t             *iomapp)
 {
        xfs_daddr_t             bn;
-        loff_t                  delta;
        int                     sector_shift;
        ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE));
        ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY));
        ASSERT(iomapp->iomap_bn != IOMAP_DADDR_NULL);
-        delta = page->index;
-        delta <<= PAGE_CACHE_SHIFT;
-        delta += offset;
-        delta -= iomapp->iomap_offset;
-        delta >>= block_bits;
        sector_shift = block_bits - BBSHIFT;
-        bn = iomapp->iomap_bn >> sector_shift;
+        bn = (iomapp->iomap_bn >> sector_shift) +
-        bn += delta;
+              ((offset - iomapp->iomap_offset) >> block_bits);
-        BUG_ON(!bn && !(iomapp->iomap_flags & IOMAP_REALTIME));
+        ASSERT(bn || (iomapp->iomap_flags & IOMAP_REALTIME));
        ASSERT((bn << sector_shift) >= iomapp->iomap_bn);
        lock_buffer(bh);
        bh->b_blocknr = bn;
-        bh->b_bdev = iomapp->iomap_target->pbr_bdev;
+        bh->b_bdev = iomapp->iomap_target->bt_bdev;
        set_buffer_mapped(bh);
        clear_buffer_delay(bh);
+        clear_buffer_unwritten(bh);
 }
 /*
- * Look for a page at index which is unlocked and contains our
+ * Look for a page at index that is suitable for clustering.
- * unwritten extent flagged buffers at its head.  Returns page
- * locked and with an extra reference count, and length of the
- * unwritten extent component on this page that we can write,
- * in units of filesystem blocks.
- */
-STATIC struct page *
-xfs_probe_unwritten_page(
-        struct address_space    *mapping,
-        pgoff_t                 index,
-        xfs_iomap_t             *iomapp,
-        xfs_ioend_t             *ioend,
-        unsigned long           max_offset,
-        unsigned long           *fsbs,
-        unsigned int            bbits)
-{
-        struct page             *page;
-        page = find_trylock_page(mapping, index);
-        if (!page)
-                return NULL;
-        if (PageWriteback(page))
-                goto out;
-        if (page->mapping && page_has_buffers(page)) {
-                struct buffer_head      *bh, *head;
-                unsigned long           p_offset = 0;
-                *fsbs = 0;
-                bh = head = page_buffers(page);
-                do {
-                        if (!buffer_unwritten(bh) || !buffer_uptodate(bh))
-                                break;
-                        if (!xfs_offset_to_map(page, iomapp, p_offset))
-                                break;
-                        if (p_offset >= max_offset)
-                                break;
-                        xfs_map_at_offset(page, bh, p_offset, bbits, iomapp);
-                        set_buffer_unwritten_io(bh);
-                        bh->b_private = ioend;
-                        p_offset += bh->b_size;
-                        (*fsbs)++;
-                } while ((bh = bh->b_this_page) != head);
-                if (p_offset)
-                        return page;
-        }
-out:
-        unlock_page(page);
-        return NULL;
-}
-/*
- * Look for a page at index which is unlocked and not mapped
- * yet - clustering for mmap write case.
 */
 STATIC unsigned int
-xfs_probe_unmapped_page(
+xfs_probe_page(
-        struct address_space    *mapping,
+        struct page             *page,
-        pgoff_t                 index,
+        unsigned int            pg_offset,
-        unsigned int            pg_offset)
+        int                     mapped)
 {
-        struct page             *page;
        int                     ret = 0;
-        page = find_trylock_page(mapping, index);
-        if (!page)
-                return 0;
        if (PageWriteback(page))
-                goto out;
+                return 0;
        if (page->mapping && PageDirty(page)) {
                if (page_has_buffers(page)) {
@@ -357,79 +510,101 @@ xfs_probe_unmapped_page(
                        bh = head = page_buffers(page);
                        do {
-                                if (buffer_mapped(bh) || !buffer_uptodate(bh))
+                                if (!buffer_uptodate(bh))
+                                        break;
+                                if (mapped != buffer_mapped(bh))
                                        break;
                                ret += bh->b_size;
                                if (ret >= pg_offset)
                                        break;
                        } while ((bh = bh->b_this_page) != head);
                } else
-                        ret = PAGE_CACHE_SIZE;
+                        ret = mapped ? 0 : PAGE_CACHE_SIZE;
        }
-out:
-        unlock_page(page);
        return ret;
 }
-STATIC unsigned int
+STATIC size_t
-xfs_probe_unmapped_cluster(
+xfs_probe_cluster(
        struct inode            *inode,
        struct page             *startpage,
        struct buffer_head      *bh,
-        struct buffer_head      *head)
+        struct buffer_head      *head,
+        int                     mapped)
 {
+        struct pagevec          pvec;
        pgoff_t                 tindex, tlast, tloff;
-        unsigned int            pg_offset, len, total = 0;
+        size_t                  total = 0;
-        struct address_space    *mapping = inode->i_mapping;
+        int                     done = 0, i;
        /* First sum forwards in this page */
        do {
-                if (buffer_mapped(bh))
+                if (mapped != buffer_mapped(bh))
-                        break;
+                        return total;
                total += bh->b_size;
        } while ((bh = bh->b_this_page) != head);
-        /* If we reached the end of the page, sum forwards in
+        /* if we reached the end of the page, sum forwards in following pages */
-         * following pages.
+        tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT;
-         */
+        tindex = startpage->index + 1;
-        if (bh == head) {
-                tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT;
+        /* Prune this back to avoid pathological behavior */
-                /* Prune this back to avoid pathological behavior */
+        tloff = min(tlast, startpage->index + 64);
-                tloff = min(tlast, startpage->index + 64);
-                for (tindex = startpage->index + 1; tindex < tloff; tindex++) {
+        pagevec_init(&pvec, 0);
-                        len = xfs_probe_unmapped_page(mapping, tindex,
+        while (!done && tindex <= tloff) {
-                                                        PAGE_CACHE_SIZE);
+                unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1);
-                        if (!len)
-                                return total;
+                if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len))
+                        break;
+                for (i = 0; i < pagevec_count(&pvec); i++) {
+                        struct page *page = pvec.pages[i];
+                        size_t pg_offset, len = 0;
+                        if (tindex == tlast) {
+                                pg_offset =
+                                    i_size_read(inode) & (PAGE_CACHE_SIZE - 1);
+                                if (!pg_offset) {
+                                        done = 1;
+                                        break;
+                                }
+                        } else
+                                pg_offset = PAGE_CACHE_SIZE;
+                        if (page->index == tindex && !TestSetPageLocked(page)) {
+                                len = xfs_probe_page(page, pg_offset, mapped);
+                                unlock_page(page);
+                        }
+                        if (!len) {
+                                done = 1;
+                                break;
+                        }
                        total += len;
+                        tindex++;
                }
-                if (tindex == tlast &&
-                    (pg_offset = i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) {
+                pagevec_release(&pvec);
-                        total += xfs_probe_unmapped_page(mapping,
+                cond_resched();
-                                                        tindex, pg_offset);
-                }
        }
        return total;
 }
 /*
- * Probe for a given page (index) in the inode and test if it is delayed
+ * Test if a given page is suitable for writing as part of an unwritten
- * and without unwritten buffers.  Returns page locked and with an extra
+ * or delayed allocate extent.
- * reference count.
 */
-STATIC struct page *
+STATIC int
-xfs_probe_delalloc_page(
+xfs_is_delayed_page(
-        struct inode            *inode,
+        struct page             *page,
-        pgoff_t                 index)
+        unsigned int            type)
 {
-        struct page             *page;
-        page = find_trylock_page(inode->i_mapping, index);
-        if (!page)
-                return NULL;
        if (PageWriteback(page))
-                goto out;
+                return 0;
        if (page->mapping && page_has_buffers(page)) {
                struct buffer_head      *bh, *head;
@@ -437,243 +612,156 @@ xfs_probe_delalloc_page(
                bh = head = page_buffers(page);
                do {
-                        if (buffer_unwritten(bh)) {
+                        if (buffer_unwritten(bh))
-                                acceptable = 0;
+                                acceptable = (type == IOMAP_UNWRITTEN);
+                        else if (buffer_delay(bh))
+                                acceptable = (type == IOMAP_DELAY);
+                        else if (buffer_mapped(bh))
+                                acceptable = (type == 0);
+                        else
                                break;
-                        } else if (buffer_delay(bh)) {
-                                acceptable = 1;
-                        }
                } while ((bh = bh->b_this_page) != head);
                if (acceptable)
-                        return page;
+                        return 1;
-        }
-out:
-        unlock_page(page);
-        return NULL;
-}
-STATIC int
-xfs_map_unwritten(
-        struct inode            *inode,
-        struct page             *start_page,
-        struct buffer_head      *head,
-        struct buffer_head      *curr,
-        unsigned long           p_offset,
-        int                     block_bits,
-        xfs_iomap_t             *iomapp,
-        struct writeback_control *wbc,
-        int                     startio,
-        int                     all_bh)
-{
-        struct buffer_head      *bh = curr;
-        xfs_iomap_t             *tmp;
-        xfs_ioend_t             *ioend;
-        loff_t                  offset;
-        unsigned long           nblocks = 0;
-        offset = start_page->index;
-        offset <<= PAGE_CACHE_SHIFT;
-        offset += p_offset;
-        ioend = xfs_alloc_ioend(inode);
-        /* First map forwards in the page consecutive buffers
-         * covering this unwritten extent
-         */
-        do {
-                if (!buffer_unwritten(bh))
-                        break;
-                tmp = xfs_offset_to_map(start_page, iomapp, p_offset);
-                if (!tmp)
-                        break;
-                xfs_map_at_offset(start_page, bh, p_offset, block_bits, iomapp);
-                set_buffer_unwritten_io(bh);
-                bh->b_private = ioend;
-                p_offset += bh->b_size;
-                nblocks++;
-        } while ((bh = bh->b_this_page) != head);
-        atomic_add(nblocks, &ioend->io_remaining);
-        /* If we reached the end of the page, map forwards in any
-         * following pages which are also covered by this extent.
-         */
-        if (bh == head) {
-                struct address_space    *mapping = inode->i_mapping;
-                pgoff_t                 tindex, tloff, tlast;
-                unsigned long           bs;
-                unsigned int            pg_offset, bbits = inode->i_blkbits;
-                struct page             *page;
-                tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT;
-                tloff = (iomapp->iomap_offset + iomapp->iomap_bsize) >> PAGE_CACHE_SHIFT;
-                tloff = min(tlast, tloff);
-                for (tindex = start_page->index + 1; tindex < tloff; tindex++) {
-                        page = xfs_probe_unwritten_page(mapping,
-                                                tindex, iomapp, ioend,
-                                                PAGE_CACHE_SIZE, &bs, bbits);
-                        if (!page)
-                                break;
-                        nblocks += bs;
-                        atomic_add(bs, &ioend->io_remaining);
-                        xfs_convert_page(inode, page, iomapp, wbc, ioend,
-                                                        startio, all_bh);
-                        /* stop if converting the next page might add
-                         * enough blocks that the corresponding byte
-                         * count won't fit in our ulong page buf length */
-                        if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits))
-                                goto enough;
-                }
-                if (tindex == tlast &&
-                    (pg_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)))) {
-                        page = xfs_probe_unwritten_page(mapping,
-                                                        tindex, iomapp, ioend,
-                                                        pg_offset, &bs, bbits);
-                        if (page) {
-                                nblocks += bs;
-                                atomic_add(bs, &ioend->io_remaining);
-                                xfs_convert_page(inode, page, iomapp, wbc, ioend,
-                                                        startio, all_bh);
-                                if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits))
-                                        goto enough;
-                        }
-                }
        }
-enough:
-        ioend->io_size = (xfs_off_t)nblocks << block_bits;
-        ioend->io_offset = offset;
-        xfs_finish_ioend(ioend);
        return 0;
 }
-STATIC void
-xfs_submit_page(
-        struct page             *page,
-        struct writeback_control *wbc,
-        struct buffer_head      *bh_arr[],
-        int                     bh_count,
-        int                     probed_page,
-        int                     clear_dirty)
-{
-        struct buffer_head      *bh;
-        int                     i;
-        BUG_ON(PageWriteback(page));
-        if (bh_count)
-                set_page_writeback(page);
-        if (clear_dirty)
-                clear_page_dirty(page);
-        unlock_page(page);
-        if (bh_count) {
-                for (i = 0; i < bh_count; i++) {
-                        bh = bh_arr[i];
-                        mark_buffer_async_write(bh);
-                        if (buffer_unwritten(bh))
-                                set_buffer_unwritten_io(bh);
-                        set_buffer_uptodate(bh);
-                        clear_buffer_dirty(bh);
-                }
-                for (i = 0; i < bh_count; i++)
-                        submit_bh(WRITE, bh_arr[i]);
-                if (probed_page && clear_dirty)
-                        wbc->nr_to_write--;     /* Wrote an "extra" page */
-        }
-}
 /*
 * Allocate & map buffers for page given the extent map. Write it out.
 * except for the original page of a writepage, this is called on
 * delalloc/unwritten pages only, for the original page it is possible
 * that the page has no mapping at all.
 */
-STATIC void
+STATIC int
 xfs_convert_page(
        struct inode            *inode,
        struct page             *page,
-        xfs_iomap_t             *iomapp,
+        loff_t                  tindex,
+        xfs_iomap_t             *mp,
+        xfs_ioend_t             **ioendp,
        struct writeback_control *wbc,
-        void                    *private,
        int                     startio,
        int                     all_bh)
 {
-        struct buffer_head      *bh_arr[MAX_BUF_PER_PAGE], *bh, *head;
+        struct buffer_head      *bh, *head;
-        xfs_iomap_t             *mp = iomapp, *tmp;
+        xfs_off_t               end_offset;
-        unsigned long           offset, end_offset;
+        unsigned long           p_offset;
-        int                     index = 0;
+        unsigned int            type;
        int                     bbits = inode->i_blkbits;
        int                     len, page_dirty;
+        int                     count = 0, done = 0, uptodate = 1;
+        xfs_off_t               offset = page_offset(page);
-        end_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1));
+        if (page->index != tindex)
+                goto fail;
+        if (TestSetPageLocked(page))
+                goto fail;
+        if (PageWriteback(page))
+                goto fail_unlock_page;
+        if (page->mapping != inode->i_mapping)
+                goto fail_unlock_page;
+        if (!xfs_is_delayed_page(page, (*ioendp)->io_type))
+                goto fail_unlock_page;
        /*
         * page_dirty is initially a count of buffers on the page before
         * EOF and is decrememted as we move each into a cleanable state.
+         *
+         * Derivation:
+         *
+         * End offset is the highest offset that this page should represent.
+         * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1))
+         * will evaluate non-zero and be less than PAGE_CACHE_SIZE and
+         * hence give us the correct page_dirty count. On any other page,
+         * it will be zero and in that case we need page_dirty to be the
+         * count of buffers on the page.
         */
+        end_offset = min_t(unsigned long long,
+                        (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
+                        i_size_read(inode));
        len = 1 << inode->i_blkbits;
-        end_offset = max(end_offset, PAGE_CACHE_SIZE);
+        p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),
-        end_offset = roundup(end_offset, len);
+                                        PAGE_CACHE_SIZE);
-        page_dirty = end_offset / len;
+        p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
+        page_dirty = p_offset / len;
-        offset = 0;
        bh = head = page_buffers(page);
        do {
                if (offset >= end_offset)
                        break;
-                if (!(PageUptodate(page) || buffer_uptodate(bh)))
+                if (!buffer_uptodate(bh))
+                        uptodate = 0;
+                if (!(PageUptodate(page) || buffer_uptodate(bh))) {
+                        done = 1;
                        continue;
-                if (buffer_mapped(bh) && all_bh &&
+                }
-                    !(buffer_unwritten(bh) || buffer_delay(bh))) {
+                if (buffer_unwritten(bh) || buffer_delay(bh)) {
+                        if (buffer_unwritten(bh))
+                                type = IOMAP_UNWRITTEN;
+                        else
+                                type = IOMAP_DELAY;
+                        if (!xfs_iomap_valid(mp, offset)) {
+                                done = 1;
+                                continue;
+                        }
+                        ASSERT(!(mp->iomap_flags & IOMAP_HOLE));
+                        ASSERT(!(mp->iomap_flags & IOMAP_DELAY));
+                        xfs_map_at_offset(bh, offset, bbits, mp);
                        if (startio) {
+                                xfs_add_to_ioend(inode, bh, offset,
+                                                type, ioendp, done);
+                        } else {
+                                set_buffer_dirty(bh);
+                                unlock_buffer(bh);
+                                mark_buffer_dirty(bh);
+                        }
+                        page_dirty--;
+                        count++;
+                } else {
+                        type = 0;
+                        if (buffer_mapped(bh) && all_bh && startio) {
                                lock_buffer(bh);
-                                bh_arr[index++] = bh;
+                                xfs_add_to_ioend(inode, bh, offset,
+                                                type, ioendp, done);
+                                count++;
                                page_dirty--;
+                        } else {
+                                done = 1;
                        }
-                        continue;
                }
-                tmp = xfs_offset_to_map(page, mp, offset);
+        } while (offset += len, (bh = bh->b_this_page) != head);
-                if (!tmp)
-                        continue;
-                ASSERT(!(tmp->iomap_flags & IOMAP_HOLE));
-                ASSERT(!(tmp->iomap_flags & IOMAP_DELAY));
-                /* If this is a new unwritten extent buffer (i.e. one
+        if (uptodate && bh == head)
-                 * that we haven't passed in private data for, we must
+                SetPageUptodate(page);
-                 * now map this buffer too.
-                 */
+        if (startio) {
-                if (buffer_unwritten(bh) && !bh->b_end_io) {
+                if (count) {
-                        ASSERT(tmp->iomap_flags & IOMAP_UNWRITTEN);
+                        struct backing_dev_info *bdi;
-                        xfs_map_unwritten(inode, page, head, bh, offset,
-                                        bbits, tmp, wbc, startio, all_bh);
+                        bdi = inode->i_mapping->backing_dev_info;
-                } else if (! (buffer_unwritten(bh) && buffer_locked(bh))) {
+                        if (bdi_write_congested(bdi)) {
-                        xfs_map_at_offset(page, bh, offset, bbits, tmp);
+                                wbc->encountered_congestion = 1;
-                        if (buffer_unwritten(bh)) {
+                                done = 1;
-                                set_buffer_unwritten_io(bh);
+                        } else if (--wbc->nr_to_write <= 0) {
-                                bh->b_private = private;
+                                done = 1;
-                                ASSERT(private);
                        }
                }
-                if (startio) {
+                xfs_start_page_writeback(page, wbc, !page_dirty, count);
-                        bh_arr[index++] = bh;
-                } else {
-                        set_buffer_dirty(bh);
-                        unlock_buffer(bh);
-                        mark_buffer_dirty(bh);
-                }
-                page_dirty--;
-        } while (offset += len, (bh = bh->b_this_page) != head);
-        if (startio && index) {
-                xfs_submit_page(page, wbc, bh_arr, index, 1, !page_dirty);
-        } else {
-                unlock_page(page);
        }
+        return done;
+ fail_unlock_page:
+        unlock_page(page);
+ fail:
+        return 1;
 }
 /*
@@ -685,19 +773,31 @@ xfs_cluster_write(
        struct inode            *inode,
        pgoff_t                 tindex,
        xfs_iomap_t             *iomapp,
+        xfs_ioend_t             **ioendp,
        struct writeback_control *wbc,
        int                     startio,
        int                     all_bh,
        pgoff_t                 tlast)
 {
-        struct page             *page;
+        struct pagevec          pvec;
+        int                     done = 0, i;
+        pagevec_init(&pvec, 0);
+        while (!done && tindex <= tlast) {
+                unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1);
-        for (; tindex <= tlast; tindex++) {
+                if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len))
-                page = xfs_probe_delalloc_page(inode, tindex);
-                if (!page)
                        break;
-                xfs_convert_page(inode, page, iomapp, wbc, NULL,
-                                startio, all_bh);
+                for (i = 0; i < pagevec_count(&pvec); i++) {
+                        done = xfs_convert_page(inode, pvec.pages[i], tindex++,
+                                        iomapp, ioendp, wbc, startio, all_bh);
+                        if (done)
+                                break;
+                }
+                pagevec_release(&pvec);
+                cond_resched();
        }
 }
@@ -728,18 +828,22 @@ xfs_page_state_convert(
        int             startio,
        int             unmapped) /* also implies page uptodate */
 {
-        struct buffer_head      *bh_arr[MAX_BUF_PER_PAGE], *bh, *head;
+        struct buffer_head      *bh, *head;
-        xfs_iomap_t             *iomp, iomap;
+        xfs_iomap_t             iomap;
+        xfs_ioend_t             *ioend = NULL, *iohead = NULL;
        loff_t                  offset;
        unsigned long           p_offset = 0;
+        unsigned int            type;
        __uint64_t              end_offset;
        pgoff_t                 end_index, last_index, tlast;
-        int                     len, err, i, cnt = 0, uptodate = 1;
+        ssize_t                 size, len;
-        int                     flags;
+        int                     flags, err, iomap_valid = 0, uptodate = 1;
-        int                     page_dirty;
+        int                     page_dirty, count = 0, trylock_flag = 0;
+        int                     all_bh = unmapped;
        /* wait for other IO threads? */
-        flags = (startio && wbc->sync_mode != WB_SYNC_NONE) ? 0 : BMAPI_TRYLOCK;
+        if (startio && (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking))
+                trylock_flag |= BMAPI_TRYLOCK;
        /* Is this page beyond the end of the file? */
        offset = i_size_read(inode);
@@ -754,161 +858,173 @@ xfs_page_state_convert(
                }
        }
-        end_offset = min_t(unsigned long long,
-                        (loff_t)(page->index + 1) << PAGE_CACHE_SHIFT, offset);
-        offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
        /*
         * page_dirty is initially a count of buffers on the page before
         * EOF and is decrememted as we move each into a cleanable state.
-         */
+         *
+         * Derivation:
+         *
+         * End offset is the highest offset that this page should represent.
+         * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1))
+         * will evaluate non-zero and be less than PAGE_CACHE_SIZE and
+         * hence give us the correct page_dirty count. On any other page,
+         * it will be zero and in that case we need page_dirty to be the
+         * count of buffers on the page.
+         */
+        end_offset = min_t(unsigned long long,
+                        (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, offset);
        len = 1 << inode->i_blkbits;
-        p_offset = max(p_offset, PAGE_CACHE_SIZE);
+        p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),
-        p_offset = roundup(p_offset, len);
+                                        PAGE_CACHE_SIZE);
+        p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
        page_dirty = p_offset / len;
-        iomp = NULL;
-        p_offset = 0;
        bh = head = page_buffers(page);
+        offset = page_offset(page);
+        flags = -1;
+        type = 0;
+        /* TODO: cleanup count and page_dirty */
        do {
                if (offset >= end_offset)
                        break;
                if (!buffer_uptodate(bh))
                        uptodate = 0;
-                if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio)
+                if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio) {
+                        /*
+                         * the iomap is actually still valid, but the ioend
+                         * isn't.  shouldn't happen too often.
+                         */
+                        iomap_valid = 0;
                        continue;
-                if (iomp) {
-                        iomp = xfs_offset_to_map(page, &iomap, p_offset);
                }
+                if (iomap_valid)
+                        iomap_valid = xfs_iomap_valid(&iomap, offset);
                /*
                 * First case, map an unwritten extent and prepare for
                 * extent state conversion transaction on completion.
-                 */
+                 *
-                if (buffer_unwritten(bh)) {
+                 * Second case, allocate space for a delalloc buffer.
-                        if (!startio)
+                 * We can return EAGAIN here in the release page case.
-                                continue;
+                 *
-                        if (!iomp) {
+                 * Third case, an unmapped buffer was found, and we are
-                                err = xfs_map_blocks(inode, offset, len, &iomap,
+                 * in a path where we need to write the whole page out.
-                                                BMAPI_WRITE|BMAPI_IGNSTATE);
+                 */
-                                if (err) {
+                if (buffer_unwritten(bh) || buffer_delay(bh) ||
-                                        goto error;
+                    ((buffer_uptodate(bh) || PageUptodate(page)) &&
-                                }
+                     !buffer_mapped(bh) && (unmapped || startio))) {
-                                iomp = xfs_offset_to_map(page, &iomap,
+                        /*
-                                                                p_offset);
+                         * Make sure we don't use a read-only iomap
+                         */
+                        if (flags == BMAPI_READ)
+                                iomap_valid = 0;
+                        if (buffer_unwritten(bh)) {
+                                type = IOMAP_UNWRITTEN;
+                                flags = BMAPI_WRITE|BMAPI_IGNSTATE;
+                        } else if (buffer_delay(bh)) {
+                                type = IOMAP_DELAY;
+                                flags = BMAPI_ALLOCATE;
+                                if (!startio)
+                                        flags |= trylock_flag;
+                        } else {
+                                type = IOMAP_NEW;
+                                flags = BMAPI_WRITE|BMAPI_MMAP;
                        }
-                        if (iomp) {
-                                if (!bh->b_end_io) {
+                        if (!iomap_valid) {
-                                        err = xfs_map_unwritten(inode, page,
+                                if (type == IOMAP_NEW) {
-                                                        head, bh, p_offset,
+                                        size = xfs_probe_cluster(inode,
-                                                        inode->i_blkbits, iomp,
+                                                        page, bh, head, 0);
-                                                        wbc, startio, unmapped);
-                                        if (err) {
-                                                goto error;
-                                        }
                                } else {
-                                        set_bit(BH_Lock, &bh->b_state);
+                                        size = len;
                                }
-                                BUG_ON(!buffer_locked(bh));
-                                bh_arr[cnt++] = bh;
+                                err = xfs_map_blocks(inode, offset, size,
-                                page_dirty--;
+                                                &iomap, flags);
-                        }
+                                if (err)
-                /*
-                 * Second case, allocate space for a delalloc buffer.
-                 * We can return EAGAIN here in the release page case.
-                 */
-                } else if (buffer_delay(bh)) {
-                        if (!iomp) {
-                                err = xfs_map_blocks(inode, offset, len, &iomap,
-                                                BMAPI_ALLOCATE | flags);
-                                if (err) {
                                        goto error;
-                                }
+                                iomap_valid = xfs_iomap_valid(&iomap, offset);
-                                iomp = xfs_offset_to_map(page, &iomap,
-                                                                p_offset);
                        }
-                        if (iomp) {
+                        if (iomap_valid) {
-                                xfs_map_at_offset(page, bh, p_offset,
+                                xfs_map_at_offset(bh, offset,
-                                                inode->i_blkbits, iomp);
+                                                inode->i_blkbits, &iomap);
                                if (startio) {
-                                        bh_arr[cnt++] = bh;
+                                        xfs_add_to_ioend(inode, bh, offset,
+                                                        type, &ioend,
+                                                        !iomap_valid);
                                } else {
                                        set_buffer_dirty(bh);
                                        unlock_buffer(bh);
                                        mark_buffer_dirty(bh);
                                }
                                page_dirty--;
+                                count++;
+                        }
+                } else if (buffer_uptodate(bh) && startio) {
+                        /*
+                         * we got here because the buffer is already mapped.
+                         * That means it must already have extents allocated
+                         * underneath it. Map the extent by reading it.
+                         */
+                        if (!iomap_valid || type != 0) {
+                                flags = BMAPI_READ;
+                                size = xfs_probe_cluster(inode, page, bh,
+                                                                head, 1);
+                                err = xfs_map_blocks(inode, offset, size,
+                                                &iomap, flags);
+                                if (err)
+                                        goto error;
+                                iomap_valid = xfs_iomap_valid(&iomap, offset);
                        }
-                } else if ((buffer_uptodate(bh) || PageUptodate(page)) &&
-                           (unmapped || startio)) {
-                        if (!buffer_mapped(bh)) {
+                        type = 0;
-                                int     size;
+                        if (!test_and_set_bit(BH_Lock, &bh->b_state)) {
+                                ASSERT(buffer_mapped(bh));
-                                /*
+                                if (iomap_valid)
-                                 * Getting here implies an unmapped buffer
+                                        all_bh = 1;
-                                 * was found, and we are in a path where we
+                                xfs_add_to_ioend(inode, bh, offset, type,
-                                 * need to write the whole page out.
+                                                &ioend, !iomap_valid);
-                                 */
+                                page_dirty--;
-                                if (!iomp) {
+                                count++;
-                                        size = xfs_probe_unmapped_cluster(
+                        } else {
-                                                        inode, page, bh, head);
+                                iomap_valid = 0;
-                                        err = xfs_map_blocks(inode, offset,
-                                                        size, &iomap,
-                                                        BMAPI_WRITE|BMAPI_MMAP);
-                                        if (err) {
-                                                goto error;
-                                        }
-                                        iomp = xfs_offset_to_map(page, &iomap,
-                                                                     p_offset);
-                                }
-                                if (iomp) {
-                                        xfs_map_at_offset(page,
-                                                        bh, p_offset,
-                                                        inode->i_blkbits, iomp);
-                                        if (startio) {
-                                                bh_arr[cnt++] = bh;
-                                        } else {
-                                                set_buffer_dirty(bh);
-                                                unlock_buffer(bh);
-                                                mark_buffer_dirty(bh);
-                                        }
-                                        page_dirty--;
-                                }
-                        } else if (startio) {
-                                if (buffer_uptodate(bh) &&
-                                    !test_and_set_bit(BH_Lock, &bh->b_state)) {
-                                        bh_arr[cnt++] = bh;
-                                        page_dirty--;
-                                }
                        }
+                } else if ((buffer_uptodate(bh) || PageUptodate(page)) &&
+                           (unmapped || startio)) {
+                        iomap_valid = 0;
                }
-        } while (offset += len, p_offset += len,
-                ((bh = bh->b_this_page) != head));
+                if (!iohead)
+                        iohead = ioend;
+        } while (offset += len, ((bh = bh->b_this_page) != head));
        if (uptodate && bh == head)
                SetPageUptodate(page);
-        if (startio) {
+        if (startio)
-                xfs_submit_page(page, wbc, bh_arr, cnt, 0, !page_dirty);
+                xfs_start_page_writeback(page, wbc, 1, count);
-        }
-        if (iomp) {
+        if (ioend && iomap_valid) {
-                offset = (iomp->iomap_offset + iomp->iomap_bsize - 1) >>
+                offset = (iomap.iomap_offset + iomap.iomap_bsize - 1) >>
                                        PAGE_CACHE_SHIFT;
                tlast = min_t(pgoff_t, offset, last_index);
-                xfs_cluster_write(inode, page->index + 1, iomp, wbc,
+                xfs_cluster_write(inode, page->index + 1, &iomap, &ioend,
-                                        startio, unmapped, tlast);
+                                        wbc, startio, all_bh, tlast);
        }
+        if (iohead)
+                xfs_submit_ioend(iohead);
        return page_dirty;
 error:
-        for (i = 0; i < cnt; i++) {
+        if (iohead)
-                unlock_buffer(bh_arr[i]);
+                xfs_cancel_ioend(iohead);
-        }
        /*
         * If it's delalloc and we have nowhere to put it,
@@ -916,9 +1032,8 @@ error:
         * us to try again.
         */
        if (err != -EAGAIN) {
-                if (!unmapped) {
+                if (!unmapped)
                        block_invalidatepage(page, 0);
-                }
                ClearPageUptodate(page);
        }
        return err;
@@ -982,7 +1097,7 @@ __linvfs_get_block(
        }
        /* If this is a realtime file, data might be on a new device */
-        bh_result->b_bdev = iomap.iomap_target->pbr_bdev;
+        bh_result->b_bdev = iomap.iomap_target->bt_bdev;
        /* If we previously allocated a block out beyond eof and
         * we are now coming back to use it then we will need to
@@ -1094,10 +1209,10 @@ linvfs_direct_IO(
        if (error)
                return -error;
-        iocb->private = xfs_alloc_ioend(inode);
+        iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN);
        ret = blockdev_direct_IO_own_locking(rw, iocb, inode,
-                iomap.iomap_target->pbr_bdev,
+                iomap.iomap_target->bt_bdev,
                iov, offset, nr_segs,
                linvfs_get_blocks_direct,
                linvfs_end_io_direct);
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
index 4720758a9ade..55339dd5a30d 100644
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -23,14 +23,24 @@ extern mempool_t *xfs_ioend_pool;
 typedef void (*xfs_ioend_func_t)(void *);
+/*
+ * xfs_ioend struct manages large extent writes for XFS.
+ * It can manage several multi-page bio's at once.
+ */
 typedef struct xfs_ioend {
+        struct xfs_ioend        *io_list;       /* next ioend in chain */
+        unsigned int            io_type;        /* delalloc / unwritten */
        unsigned int            io_uptodate;    /* I/O status register */
        atomic_t                io_remaining;   /* hold count */
        struct vnode            *io_vnode;      /* file being written to */
        struct buffer_head      *io_buffer_head;/* buffer linked list head */
+        struct buffer_head      *io_buffer_tail;/* buffer linked list tail */
        size_t                  io_size;        /* size of the extent */
        xfs_off_t               io_offset;      /* offset in the file */
        struct work_struct      io_work;        /* xfsdatad work queue */
 } xfs_ioend_t;
+extern struct address_space_operations linvfs_aops;
+extern int linvfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
 #endif /* __XFS_IOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 6fe21d2b8847..e44b7c1a3a36 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -31,76 +31,77 @@
 #include <linux/kthread.h>
 #include "xfs_linux.h"
-STATIC kmem_cache_t *pagebuf_zone;
+STATIC kmem_zone_t *xfs_buf_zone;
-STATIC kmem_shaker_t pagebuf_shake;
+STATIC kmem_shaker_t xfs_buf_shake;
+STATIC int xfsbufd(void *);
 STATIC int xfsbufd_wakeup(int, gfp_t);
-STATIC void pagebuf_delwri_queue(xfs_buf_t *, int);
+STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
 STATIC struct workqueue_struct *xfslogd_workqueue;
 struct workqueue_struct *xfsdatad_workqueue;
-#ifdef PAGEBUF_TRACE
+#ifdef XFS_BUF_TRACE
 void
-pagebuf_trace(
+xfs_buf_trace(
-        xfs_buf_t       *pb,
+        xfs_buf_t       *bp,
        char            *id,
        void            *data,
        void            *ra)
 {
-        ktrace_enter(pagebuf_trace_buf,
+        ktrace_enter(xfs_buf_trace_buf,
-                pb, id,
+                bp, id,
-                (void *)(unsigned long)pb->pb_flags,
+                (void *)(unsigned long)bp->b_flags,
-                (void *)(unsigned long)pb->pb_hold.counter,
+                (void *)(unsigned long)bp->b_hold.counter,
-                (void *)(unsigned long)pb->pb_sema.count.counter,
+                (void *)(unsigned long)bp->b_sema.count.counter,
                (void *)current,
                data, ra,
-                (void *)(unsigned long)((pb->pb_file_offset>>32) & 0xffffffff),
+                (void *)(unsigned long)((bp->b_file_offset>>32) & 0xffffffff),
-                (void *)(unsigned long)(pb->pb_file_offset & 0xffffffff),
+                (void *)(unsigned long)(bp->b_file_offset & 0xffffffff),
-                (void *)(unsigned long)pb->pb_buffer_length,
+                (void *)(unsigned long)bp->b_buffer_length,
                NULL, NULL, NULL, NULL, NULL);
 }
-ktrace_t *pagebuf_trace_buf;
+ktrace_t *xfs_buf_trace_buf;
-#define PAGEBUF_TRACE_SIZE      4096
+#define XFS_BUF_TRACE_SIZE      4096
-#define PB_TRACE(pb, id, data)  \
+#define XB_TRACE(bp, id, data)  \
-        pagebuf_trace(pb, id, (void *)data, (void *)__builtin_return_address(0))
+        xfs_buf_trace(bp, id, (void *)data, (void *)__builtin_return_address(0))
 #else
-#define PB_TRACE(pb, id, data)  do { } while (0)
+#define XB_TRACE(bp, id, data)  do { } while (0)
 #endif
-#ifdef PAGEBUF_LOCK_TRACKING
+#ifdef XFS_BUF_LOCK_TRACKING
-# define PB_SET_OWNER(pb)       ((pb)->pb_last_holder = current->pid)
+# define XB_SET_OWNER(bp)       ((bp)->b_last_holder = current->pid)
-# define PB_CLEAR_OWNER(pb)     ((pb)->pb_last_holder = -1)
+# define XB_CLEAR_OWNER(bp)     ((bp)->b_last_holder = -1)
-# define PB_GET_OWNER(pb)       ((pb)->pb_last_holder)
+# define XB_GET_OWNER(bp)       ((bp)->b_last_holder)
 #else
-# define PB_SET_OWNER(pb)       do { } while (0)
+# define XB_SET_OWNER(bp)       do { } while (0)
-# define PB_CLEAR_OWNER(pb)     do { } while (0)
+# define XB_CLEAR_OWNER(bp)     do { } while (0)
-# define PB_GET_OWNER(pb)       do { } while (0)
+# define XB_GET_OWNER(bp)       do { } while (0)
 #endif
-#define pb_to_gfp(flags) \
+#define xb_to_gfp(flags) \
-        ((((flags) & PBF_READ_AHEAD) ? __GFP_NORETRY : \
+        ((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : \
-          ((flags) & PBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN)
+          ((flags) & XBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN)
-#define pb_to_km(flags) \
+#define xb_to_km(flags) \
-         (((flags) & PBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP)
+         (((flags) & XBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP)
-#define pagebuf_allocate(flags) \
+#define xfs_buf_allocate(flags) \
-        kmem_zone_alloc(pagebuf_zone, pb_to_km(flags))
+        kmem_zone_alloc(xfs_buf_zone, xb_to_km(flags))
-#define pagebuf_deallocate(pb) \
+#define xfs_buf_deallocate(bp) \
-        kmem_zone_free(pagebuf_zone, (pb));
+        kmem_zone_free(xfs_buf_zone, (bp));
 /*
- * Page Region interfaces.
+ *      Page Region interfaces.
 *
- * For pages in filesystems where the blocksize is smaller than the
+ *      For pages in filesystems where the blocksize is smaller than the
- * pagesize, we use the page->private field (long) to hold a bitmap
+ *      pagesize, we use the page->private field (long) to hold a bitmap
- * of uptodate regions within the page.
+ *      of uptodate regions within the page.
 *
- * Each such region is "bytes per page / bits per long" bytes long.
+ *      Each such region is "bytes per page / bits per long" bytes long.
 *
- * NBPPR == number-of-bytes-per-page-region
+ *      NBPPR == number-of-bytes-per-page-region
- * BTOPR == bytes-to-page-region (rounded up)
+ *      BTOPR == bytes-to-page-region (rounded up)
- * BTOPRT == bytes-to-page-region-truncated (rounded down)
+ *      BTOPRT == bytes-to-page-region-truncated (rounded down)
 */
 #if (BITS_PER_LONG == 32)
 #define PRSHIFT         (PAGE_CACHE_SHIFT - 5)  /* (32 == 1<<5) */
@@ -159,7 +160,7 @@ test_page_region(
 }
 /*
- * Mapping of multi-page buffers into contiguous virtual space
+ *      Mapping of multi-page buffers into contiguous virtual space
 */
 typedef struct a_list {
@@ -172,7 +173,7 @@ STATIC int		as_list_len;
 STATIC DEFINE_SPINLOCK(as_lock);
 /*
- * Try to batch vunmaps because they are costly.
+ *      Try to batch vunmaps because they are costly.
 */
 STATIC void
 free_address(
@@ -215,83 +216,83 @@ purge_addresses(void)
 }
 /*
- *      Internal pagebuf object manipulation
+ *      Internal xfs_buf_t object manipulation
 */
 STATIC void
-_pagebuf_initialize(
+_xfs_buf_initialize(
-        xfs_buf_t               *pb,
+        xfs_buf_t               *bp,
        xfs_buftarg_t           *target,
-        loff_t                  range_base,
+        xfs_off_t               range_base,
        size_t                  range_length,
-        page_buf_flags_t        flags)
+        xfs_buf_flags_t         flags)
 {
        /*
-         * We don't want certain flags to appear in pb->pb_flags.
+         * We don't want certain flags to appear in b_flags.
         */
-        flags &= ~(PBF_LOCK|PBF_MAPPED|PBF_DONT_BLOCK|PBF_READ_AHEAD);
+        flags &= ~(XBF_LOCK|XBF_MAPPED|XBF_DONT_BLOCK|XBF_READ_AHEAD);
-        memset(pb, 0, sizeof(xfs_buf_t));
+        memset(bp, 0, sizeof(xfs_buf_t));
-        atomic_set(&pb->pb_hold, 1);
+        atomic_set(&bp->b_hold, 1);
-        init_MUTEX_LOCKED(&pb->pb_iodonesema);
+        init_MUTEX_LOCKED(&bp->b_iodonesema);
-        INIT_LIST_HEAD(&pb->pb_list);
+        INIT_LIST_HEAD(&bp->b_list);
-        INIT_LIST_HEAD(&pb->pb_hash_list);
+        INIT_LIST_HEAD(&bp->b_hash_list);
-        init_MUTEX_LOCKED(&pb->pb_sema); /* held, no waiters */
+        init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */
-        PB_SET_OWNER(pb);
+        XB_SET_OWNER(bp);
-        pb->pb_target = target;
+        bp->b_target = target;
-        pb->pb_file_offset = range_base;
+        bp->b_file_offset = range_base;
        /*
         * Set buffer_length and count_desired to the same value initially.
         * I/O routines should use count_desired, which will be the same in
         * most cases but may be reset (e.g. XFS recovery).
         */
-        pb->pb_buffer_length = pb->pb_count_desired = range_length;
+        bp->b_buffer_length = bp->b_count_desired = range_length;
-        pb->pb_flags = flags;
+        bp->b_flags = flags;
-        pb->pb_bn = XFS_BUF_DADDR_NULL;
+        bp->b_bn = XFS_BUF_DADDR_NULL;
-        atomic_set(&pb->pb_pin_count, 0);
+        atomic_set(&bp->b_pin_count, 0);
-        init_waitqueue_head(&pb->pb_waiters);
+        init_waitqueue_head(&bp->b_waiters);
-        XFS_STATS_INC(pb_create);
+        XFS_STATS_INC(xb_create);
-        PB_TRACE(pb, "initialize", target);
+        XB_TRACE(bp, "initialize", target);
 }
 /*
- * Allocate a page array capable of holding a specified number
+ *      Allocate a page array capable of holding a specified number
- * of pages, and point the page buf at it.
+ *      of pages, and point the page buf at it.
 */
 STATIC int
-_pagebuf_get_pages(
+_xfs_buf_get_pages(
-        xfs_buf_t               *pb,
+        xfs_buf_t               *bp,
        int                     page_count,
-        page_buf_flags_t        flags)
+        xfs_buf_flags_t         flags)
 {
        /* Make sure that we have a page list */
-        if (pb->pb_pages == NULL) {
+        if (bp->b_pages == NULL) {
-                pb->pb_offset = page_buf_poff(pb->pb_file_offset);
+                bp->b_offset = xfs_buf_poff(bp->b_file_offset);
-                pb->pb_page_count = page_count;
+                bp->b_page_count = page_count;
-                if (page_count <= PB_PAGES) {
+                if (page_count <= XB_PAGES) {
-                        pb->pb_pages = pb->pb_page_array;
+                        bp->b_pages = bp->b_page_array;
                } else {
-                        pb->pb_pages = kmem_alloc(sizeof(struct page *) *
+                        bp->b_pages = kmem_alloc(sizeof(struct page *) *
-                                        page_count, pb_to_km(flags));
+                                        page_count, xb_to_km(flags));
-                        if (pb->pb_pages == NULL)
+                        if (bp->b_pages == NULL)
                                return -ENOMEM;
                }
-                memset(pb->pb_pages, 0, sizeof(struct page *) * page_count);
+                memset(bp->b_pages, 0, sizeof(struct page *) * page_count);
        }
        return 0;
 }
 /*
- *      Frees pb_pages if it was malloced.
+ *      Frees b_pages if it was allocated.
 */
 STATIC void
-_pagebuf_free_pages(
+_xfs_buf_free_pages(
        xfs_buf_t       *bp)
 {
-        if (bp->pb_pages != bp->pb_page_array) {
+        if (bp->b_pages != bp->b_page_array) {
-                kmem_free(bp->pb_pages,
+                kmem_free(bp->b_pages,
-                          bp->pb_page_count * sizeof(struct page *));
+                          bp->b_page_count * sizeof(struct page *));
        }
 }
@@ -299,79 +300,79 @@ _pagebuf_free_pages(
 *      Releases the specified buffer.
 *
 *      The modification state of any associated pages is left unchanged.
- *      The buffer most not be on any hash - use pagebuf_rele instead for
+ *      The buffer most not be on any hash - use xfs_buf_rele instead for
 *      hashed and refcounted buffers
 */
 void
-pagebuf_free(
+xfs_buf_free(
        xfs_buf_t               *bp)
 {
-        PB_TRACE(bp, "free", 0);
+        XB_TRACE(bp, "free", 0);
-        ASSERT(list_empty(&bp->pb_hash_list));
+        ASSERT(list_empty(&bp->b_hash_list));
-        if (bp->pb_flags & _PBF_PAGE_CACHE) {
+        if (bp->b_flags & _XBF_PAGE_CACHE) {
                uint            i;
-                if ((bp->pb_flags & PBF_MAPPED) && (bp->pb_page_count > 1))
+                if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1))
-                        free_address(bp->pb_addr - bp->pb_offset);
+                        free_address(bp->b_addr - bp->b_offset);
-                for (i = 0; i < bp->pb_page_count; i++)
+                for (i = 0; i < bp->b_page_count; i++)
-                        page_cache_release(bp->pb_pages[i]);
+                        page_cache_release(bp->b_pages[i]);
-                _pagebuf_free_pages(bp);
+                _xfs_buf_free_pages(bp);
-        } else if (bp->pb_flags & _PBF_KMEM_ALLOC) {
+        } else if (bp->b_flags & _XBF_KMEM_ALLOC) {
                 /*
-                  * XXX(hch): bp->pb_count_desired might be incorrect (see
+                  * XXX(hch): bp->b_count_desired might be incorrect (see
-                  * pagebuf_associate_memory for details), but fortunately
+                  * xfs_buf_associate_memory for details), but fortunately
                  * the Linux version of kmem_free ignores the len argument..
                  */
-                kmem_free(bp->pb_addr, bp->pb_count_desired);
+                kmem_free(bp->b_addr, bp->b_count_desired);
-                _pagebuf_free_pages(bp);
+                _xfs_buf_free_pages(bp);
        }
-        pagebuf_deallocate(bp);
+        xfs_buf_deallocate(bp);
 }
 /*
 *      Finds all pages for buffer in question and builds it's page list.
 */
 STATIC int
-_pagebuf_lookup_pages(
+_xfs_buf_lookup_pages(
        xfs_buf_t               *bp,
        uint                    flags)
 {
-        struct address_space    *mapping = bp->pb_target->pbr_mapping;
+        struct address_space    *mapping = bp->b_target->bt_mapping;
-        size_t                  blocksize = bp->pb_target->pbr_bsize;
+        size_t                  blocksize = bp->b_target->bt_bsize;
-        size_t                  size = bp->pb_count_desired;
+        size_t                  size = bp->b_count_desired;
        size_t                  nbytes, offset;
-        gfp_t                   gfp_mask = pb_to_gfp(flags);
+        gfp_t                   gfp_mask = xb_to_gfp(flags);
        unsigned short          page_count, i;
        pgoff_t                 first;
-        loff_t                  end;
+        xfs_off_t               end;
        int                     error;
-        end = bp->pb_file_offset + bp->pb_buffer_length;
+        end = bp->b_file_offset + bp->b_buffer_length;
-        page_count = page_buf_btoc(end) - page_buf_btoct(bp->pb_file_offset);
+        page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset);
-        error = _pagebuf_get_pages(bp, page_count, flags);
+        error = _xfs_buf_get_pages(bp, page_count, flags);
        if (unlikely(error))
                return error;
-        bp->pb_flags |= _PBF_PAGE_CACHE;
+        bp->b_flags |= _XBF_PAGE_CACHE;
-        offset = bp->pb_offset;
+        offset = bp->b_offset;
-        first = bp->pb_file_offset >> PAGE_CACHE_SHIFT;
+        first = bp->b_file_offset >> PAGE_CACHE_SHIFT;
-        for (i = 0; i < bp->pb_page_count; i++) {
+        for (i = 0; i < bp->b_page_count; i++) {
                struct page     *page;
                uint            retries = 0;
              retry:
                page = find_or_create_page(mapping, first + i, gfp_mask);
                if (unlikely(page == NULL)) {
-                        if (flags & PBF_READ_AHEAD) {
+                        if (flags & XBF_READ_AHEAD) {
-                                bp->pb_page_count = i;
+                                bp->b_page_count = i;
-                                for (i = 0; i < bp->pb_page_count; i++)
+                                for (i = 0; i < bp->b_page_count; i++)
-                                        unlock_page(bp->pb_pages[i]);
+                                        unlock_page(bp->b_pages[i]);
                                return -ENOMEM;
                        }
@@ -387,13 +388,13 @@ _pagebuf_lookup_pages(
                                        "deadlock in %s (mode:0x%x)\n",
                                        __FUNCTION__, gfp_mask);
-                        XFS_STATS_INC(pb_page_retries);
+                        XFS_STATS_INC(xb_page_retries);
                        xfsbufd_wakeup(0, gfp_mask);
                        blk_congestion_wait(WRITE, HZ/50);
                        goto retry;
                }
-                XFS_STATS_INC(pb_page_found);
+                XFS_STATS_INC(xb_page_found);
                nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset);
                size -= nbytes;
@@ -401,27 +402,27 @@ _pagebuf_lookup_pages(
                if (!PageUptodate(page)) {
                        page_count--;
                        if (blocksize >= PAGE_CACHE_SIZE) {
-                                if (flags & PBF_READ)
+                                if (flags & XBF_READ)
-                                        bp->pb_locked = 1;
+                                        bp->b_locked = 1;
                        } else if (!PagePrivate(page)) {
                                if (test_page_region(page, offset, nbytes))
                                        page_count++;
                        }
                }
-                bp->pb_pages[i] = page;
+                bp->b_pages[i] = page;
                offset = 0;
        }
-        if (!bp->pb_locked) {
+        if (!bp->b_locked) {
-                for (i = 0; i < bp->pb_page_count; i++)
+                for (i = 0; i < bp->b_page_count; i++)
-                        unlock_page(bp->pb_pages[i]);
+                        unlock_page(bp->b_pages[i]);
        }
-        if (page_count == bp->pb_page_count)
+        if (page_count == bp->b_page_count)
-                bp->pb_flags |= PBF_DONE;
+                bp->b_flags |= XBF_DONE;
-        PB_TRACE(bp, "lookup_pages", (long)page_count);
+        XB_TRACE(bp, "lookup_pages", (long)page_count);
        return error;
 }
@@ -429,23 +430,23 @@ _pagebuf_lookup_pages(
 *      Map buffer into kernel address-space if nessecary.
 */
 STATIC int
-_pagebuf_map_pages(
+_xfs_buf_map_pages(
        xfs_buf_t               *bp,
        uint                    flags)
 {
        /* A single page buffer is always mappable */
-        if (bp->pb_page_count == 1) {
+        if (bp->b_page_count == 1) {
-                bp->pb_addr = page_address(bp->pb_pages[0]) + bp->pb_offset;
+                bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset;
-                bp->pb_flags |= PBF_MAPPED;
+                bp->b_flags |= XBF_MAPPED;
-        } else if (flags & PBF_MAPPED) {
+        } else if (flags & XBF_MAPPED) {
                if (as_list_len > 64)
                        purge_addresses();
-                bp->pb_addr = vmap(bp->pb_pages, bp->pb_page_count,
+                bp->b_addr = vmap(bp->b_pages, bp->b_page_count,
-                                VM_MAP, PAGE_KERNEL);
+                                        VM_MAP, PAGE_KERNEL);
-                if (unlikely(bp->pb_addr == NULL))
+                if (unlikely(bp->b_addr == NULL))
                        return -ENOMEM;
-                bp->pb_addr += bp->pb_offset;
+                bp->b_addr += bp->b_offset;
-                bp->pb_flags |= PBF_MAPPED;
+                bp->b_flags |= XBF_MAPPED;
        }
        return 0;
@@ -456,9 +457,7 @@ _pagebuf_map_pages(
 */
 /*
- *      _pagebuf_find
+ *      Look up, and creates if absent, a lockable buffer for
- *
- *      Looks up, and creates if absent, a lockable buffer for
 *      a given range of an inode.  The buffer is returned
 *      locked.  If other overlapping buffers exist, they are
 *      released before the new buffer is created and locked,
@@ -466,55 +465,55 @@ _pagebuf_map_pages(
 *      are unlocked.  No I/O is implied by this call.
 */
 xfs_buf_t *
-_pagebuf_find(
+_xfs_buf_find(
        xfs_buftarg_t           *btp,   /* block device target          */
-        loff_t                  ioff,   /* starting offset of range     */
+        xfs_off_t               ioff,   /* starting offset of range     */
        size_t                  isize,  /* length of range              */
-        page_buf_flags_t        flags,  /* PBF_TRYLOCK                  */
+        xfs_buf_flags_t         flags,
-        xfs_buf_t               *new_pb)/* newly allocated buffer       */
+        xfs_buf_t               *new_bp)
 {
-        loff_t                  range_base;
+        xfs_off_t               range_base;
        size_t                  range_length;
        xfs_bufhash_t           *hash;
-        xfs_buf_t               *pb, *n;
+        xfs_buf_t               *bp, *n;
        range_base = (ioff << BBSHIFT);
        range_length = (isize << BBSHIFT);
        /* Check for IOs smaller than the sector size / not sector aligned */
-        ASSERT(!(range_length < (1 << btp->pbr_sshift)));
+        ASSERT(!(range_length < (1 << btp->bt_sshift)));
-        ASSERT(!(range_base & (loff_t)btp->pbr_smask));
+        ASSERT(!(range_base & (xfs_off_t)btp->bt_smask));
        hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)];
        spin_lock(&hash->bh_lock);
-        list_for_each_entry_safe(pb, n, &hash->bh_list, pb_hash_list) {
+        list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) {
-                ASSERT(btp == pb->pb_target);
+                ASSERT(btp == bp->b_target);
-                if (pb->pb_file_offset == range_base &&
+                if (bp->b_file_offset == range_base &&
-                    pb->pb_buffer_length == range_length) {
+                    bp->b_buffer_length == range_length) {
                        /*
-                         * If we look at something bring it to the
+                         * If we look at something, bring it to the
                         * front of the list for next time.
                         */
-                        atomic_inc(&pb->pb_hold);
+                        atomic_inc(&bp->b_hold);
-                        list_move(&pb->pb_hash_list, &hash->bh_list);
+                        list_move(&bp->b_hash_list, &hash->bh_list);
                        goto found;
                }
        }
        /* No match found */
-        if (new_pb) {
+        if (new_bp) {
-                _pagebuf_initialize(new_pb, btp, range_base,
+                _xfs_buf_initialize(new_bp, btp, range_base,
                                range_length, flags);
-                new_pb->pb_hash = hash;
+                new_bp->b_hash = hash;
-                list_add(&new_pb->pb_hash_list, &hash->bh_list);
+                list_add(&new_bp->b_hash_list, &hash->bh_list);
        } else {
-                XFS_STATS_INC(pb_miss_locked);
+                XFS_STATS_INC(xb_miss_locked);
        }
        spin_unlock(&hash->bh_lock);
-        return new_pb;
+        return new_bp;
 found:
        spin_unlock(&hash->bh_lock);
@@ -523,74 +522,72 @@ found:
         * if this does not work then we need to drop the
         * spinlock and do a hard attempt on the semaphore.
         */
-        if (down_trylock(&pb->pb_sema)) {
+        if (down_trylock(&bp->b_sema)) {
-                if (!(flags & PBF_TRYLOCK)) {
+                if (!(flags & XBF_TRYLOCK)) {
                        /* wait for buffer ownership */
-                        PB_TRACE(pb, "get_lock", 0);
+                        XB_TRACE(bp, "get_lock", 0);
-                        pagebuf_lock(pb);
+                        xfs_buf_lock(bp);
-                        XFS_STATS_INC(pb_get_locked_waited);
+                        XFS_STATS_INC(xb_get_locked_waited);
                } else {
                        /* We asked for a trylock and failed, no need
                         * to look at file offset and length here, we
-                         * know that this pagebuf at least overlaps our
+                         * know that this buffer at least overlaps our
-                         * pagebuf and is locked, therefore our buffer
+                         * buffer and is locked, therefore our buffer
-                         * either does not exist, or is this buffer
+                         * either does not exist, or is this buffer.
                         */
+                        xfs_buf_rele(bp);
-                        pagebuf_rele(pb);
+                        XFS_STATS_INC(xb_busy_locked);
-                        XFS_STATS_INC(pb_busy_locked);
+                        return NULL;
-                        return (NULL);
                }
        } else {
                /* trylock worked */
-                PB_SET_OWNER(pb);
+                XB_SET_OWNER(bp);
        }
-        if (pb->pb_flags & PBF_STALE) {
+        if (bp->b_flags & XBF_STALE) {
-                ASSERT((pb->pb_flags & _PBF_DELWRI_Q) == 0);
+                ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);
-                pb->pb_flags &= PBF_MAPPED;
+                bp->b_flags &= XBF_MAPPED;
        }
-        PB_TRACE(pb, "got_lock", 0);
+        XB_TRACE(bp, "got_lock", 0);
-        XFS_STATS_INC(pb_get_locked);
+        XFS_STATS_INC(xb_get_locked);
-        return (pb);
+        return bp;
 }
 /*
- *      xfs_buf_get_flags assembles a buffer covering the specified range.
+ *      Assembles a buffer covering the specified range.
- *
 *      Storage in memory for all portions of the buffer will be allocated,
 *      although backing storage may not be.
 */
 xfs_buf_t *
-xfs_buf_get_flags(                      /* allocate a buffer            */
+xfs_buf_get_flags(
        xfs_buftarg_t           *target,/* target for buffer            */
-        loff_t                  ioff,   /* starting offset of range     */
+        xfs_off_t               ioff,   /* starting offset of range     */
        size_t                  isize,  /* length of range              */
-        page_buf_flags_t        flags)  /* PBF_TRYLOCK                  */
+        xfs_buf_flags_t         flags)
 {
-        xfs_buf_t               *pb, *new_pb;
+        xfs_buf_t               *bp, *new_bp;
        int                     error = 0, i;
-        new_pb = pagebuf_allocate(flags);
+        new_bp = xfs_buf_allocate(flags);
-        if (unlikely(!new_pb))
+        if (unlikely(!new_bp))
                return NULL;
-        pb = _pagebuf_find(target, ioff, isize, flags, new_pb);
+        bp = _xfs_buf_find(target, ioff, isize, flags, new_bp);
-        if (pb == new_pb) {
+        if (bp == new_bp) {
-                error = _pagebuf_lookup_pages(pb, flags);
+                error = _xfs_buf_lookup_pages(bp, flags);
                if (error)
                        goto no_buffer;
        } else {
-                pagebuf_deallocate(new_pb);
+                xfs_buf_deallocate(new_bp);
-                if (unlikely(pb == NULL))
+                if (unlikely(bp == NULL))
                        return NULL;
        }
-        for (i = 0; i < pb->pb_page_count; i++)
+        for (i = 0; i < bp->b_page_count; i++)
-                mark_page_accessed(pb->pb_pages[i]);
+                mark_page_accessed(bp->b_pages[i]);
-        if (!(pb->pb_flags & PBF_MAPPED)) {
+        if (!(bp->b_flags & XBF_MAPPED)) {
-                error = _pagebuf_map_pages(pb, flags);
+                error = _xfs_buf_map_pages(bp, flags);
                if (unlikely(error)) {
                        printk(KERN_WARNING "%s: failed to map pages\n",
                                        __FUNCTION__);
@@ -598,97 +595,97 @@ xfs_buf_get_flags(			/* allocate a buffer		*/
                }
        }
-        XFS_STATS_INC(pb_get);
+        XFS_STATS_INC(xb_get);
        /*
         * Always fill in the block number now, the mapped cases can do
         * their own overlay of this later.
         */
-        pb->pb_bn = ioff;
+        bp->b_bn = ioff;
-        pb->pb_count_desired = pb->pb_buffer_length;
+        bp->b_count_desired = bp->b_buffer_length;
-        PB_TRACE(pb, "get", (unsigned long)flags);
+        XB_TRACE(bp, "get", (unsigned long)flags);
-        return pb;
+        return bp;
 no_buffer:
-        if (flags & (PBF_LOCK | PBF_TRYLOCK))
+        if (flags & (XBF_LOCK | XBF_TRYLOCK))
-                pagebuf_unlock(pb);
+                xfs_buf_unlock(bp);
-        pagebuf_rele(pb);
+        xfs_buf_rele(bp);
        return NULL;
 }
 xfs_buf_t *
 xfs_buf_read_flags(
        xfs_buftarg_t           *target,
-        loff_t                  ioff,
+        xfs_off_t               ioff,
        size_t                  isize,
-        page_buf_flags_t        flags)
+        xfs_buf_flags_t         flags)
 {
-        xfs_buf_t               *pb;
+        xfs_buf_t               *bp;
-        flags |= PBF_READ;
+        flags |= XBF_READ;
-        pb = xfs_buf_get_flags(target, ioff, isize, flags);
+        bp = xfs_buf_get_flags(target, ioff, isize, flags);
-        if (pb) {
+        if (bp) {
-                if (!XFS_BUF_ISDONE(pb)) {
+                if (!XFS_BUF_ISDONE(bp)) {
-                        PB_TRACE(pb, "read", (unsigned long)flags);
+                        XB_TRACE(bp, "read", (unsigned long)flags);
-                        XFS_STATS_INC(pb_get_read);
+                        XFS_STATS_INC(xb_get_read);
-                        pagebuf_iostart(pb, flags);
+                        xfs_buf_iostart(bp, flags);
-                } else if (flags & PBF_ASYNC) {
+                } else if (flags & XBF_ASYNC) {
-                        PB_TRACE(pb, "read_async", (unsigned long)flags);
+                        XB_TRACE(bp, "read_async", (unsigned long)flags);
                        /*
                         * Read ahead call which is already satisfied,
                         * drop the buffer
                         */
                        goto no_buffer;
                } else {
-                        PB_TRACE(pb, "read_done", (unsigned long)flags);
+                        XB_TRACE(bp, "read_done", (unsigned long)flags);
                        /* We do not want read in the flags */
-                        pb->pb_flags &= ~PBF_READ;
+                        bp->b_flags &= ~XBF_READ;
                }
        }
-        return pb;
+        return bp;
 no_buffer:
-        if (flags & (PBF_LOCK | PBF_TRYLOCK))
+        if (flags & (XBF_LOCK | XBF_TRYLOCK))
-                pagebuf_unlock(pb);
+                xfs_buf_unlock(bp);
-        pagebuf_rele(pb);
+        xfs_buf_rele(bp);
        return NULL;
 }
 /*
- * If we are not low on memory then do the readahead in a deadlock
+ *      If we are not low on memory then do the readahead in a deadlock
- * safe manner.
+ *      safe manner.
 */
 void
-pagebuf_readahead(
+xfs_buf_readahead(
        xfs_buftarg_t           *target,
-        loff_t                  ioff,
+        xfs_off_t               ioff,
        size_t                  isize,
-        page_buf_flags_t        flags)
+        xfs_buf_flags_t         flags)
 {
        struct backing_dev_info *bdi;
-        bdi = target->pbr_mapping->backing_dev_info;
+        bdi = target->bt_mapping->backing_dev_info;
        if (bdi_read_congested(bdi))
                return;
-        flags |= (PBF_TRYLOCK|PBF_ASYNC|PBF_READ_AHEAD);
+        flags |= (XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD);
        xfs_buf_read_flags(target, ioff, isize, flags);
 }
 xfs_buf_t *
-pagebuf_get_empty(
+xfs_buf_get_empty(
        size_t                  len,
        xfs_buftarg_t           *target)
 {
-        xfs_buf_t               *pb;
+        xfs_buf_t               *bp;
-        pb = pagebuf_allocate(0);
+        bp = xfs_buf_allocate(0);
-        if (pb)
+        if (bp)
-                _pagebuf_initialize(pb, target, 0, len, 0);
+                _xfs_buf_initialize(bp, target, 0, len, 0);
-        return pb;
+        return bp;
 }
 static inline struct page *
@@ -704,8 +701,8 @@ mem_to_page(
 }
 int
-pagebuf_associate_memory(
+xfs_buf_associate_memory(
-        xfs_buf_t               *pb,
+        xfs_buf_t               *bp,
        void                    *mem,
        size_t                  len)
 {
@@ -722,40 +719,40 @@ pagebuf_associate_memory(
                page_count++;
        /* Free any previous set of page pointers */
-        if (pb->pb_pages)
+        if (bp->b_pages)
-                _pagebuf_free_pages(pb);
+                _xfs_buf_free_pages(bp);
-        pb->pb_pages = NULL;
+        bp->b_pages = NULL;
-        pb->pb_addr = mem;
+        bp->b_addr = mem;
-        rval = _pagebuf_get_pages(pb, page_count, 0);
+        rval = _xfs_buf_get_pages(bp, page_count, 0);
        if (rval)
                return rval;
-        pb->pb_offset = offset;
+        bp->b_offset = offset;
        ptr = (size_t) mem & PAGE_CACHE_MASK;
        end = PAGE_CACHE_ALIGN((size_t) mem + len);
        end_cur = end;
        /* set up first page */
-        pb->pb_pages[0] = mem_to_page(mem);
+        bp->b_pages[0] = mem_to_page(mem);
        ptr += PAGE_CACHE_SIZE;
-        pb->pb_page_count = ++i;
+        bp->b_page_count = ++i;
        while (ptr < end) {
-                pb->pb_pages[i] = mem_to_page((void *)ptr);
+                bp->b_pages[i] = mem_to_page((void *)ptr);
-                pb->pb_page_count = ++i;
+                bp->b_page_count = ++i;
                ptr += PAGE_CACHE_SIZE;
        }
-        pb->pb_locked = 0;
+        bp->b_locked = 0;
-        pb->pb_count_desired = pb->pb_buffer_length = len;
+        bp->b_count_desired = bp->b_buffer_length = len;
-        pb->pb_flags |= PBF_MAPPED;
+        bp->b_flags |= XBF_MAPPED;
        return 0;
 }
 xfs_buf_t *
-pagebuf_get_no_daddr(
+xfs_buf_get_noaddr(
        size_t                  len,
        xfs_buftarg_t           *target)
 {
@@ -764,10 +761,10 @@ pagebuf_get_no_daddr(
        void                    *data;
        int                     error;
-        bp = pagebuf_allocate(0);
+        bp = xfs_buf_allocate(0);
        if (unlikely(bp == NULL))
                goto fail;
-        _pagebuf_initialize(bp, target, 0, len, 0);
+        _xfs_buf_initialize(bp, target, 0, len, 0);
 try_again:
        data = kmem_alloc(malloc_len, KM_SLEEP | KM_MAYFAIL);
@@ -776,78 +773,73 @@ pagebuf_get_no_daddr(
        /* check whether alignment matches.. */
        if ((__psunsigned_t)data !=
-            ((__psunsigned_t)data & ~target->pbr_smask)) {
+            ((__psunsigned_t)data & ~target->bt_smask)) {
                /* .. else double the size and try again */
                kmem_free(data, malloc_len);
                malloc_len <<= 1;
                goto try_again;
        }
-        error = pagebuf_associate_memory(bp, data, len);
+        error = xfs_buf_associate_memory(bp, data, len);
        if (error)
                goto fail_free_mem;
-        bp->pb_flags |= _PBF_KMEM_ALLOC;
+        bp->b_flags |= _XBF_KMEM_ALLOC;
-        pagebuf_unlock(bp);
+        xfs_buf_unlock(bp);
-        PB_TRACE(bp, "no_daddr", data);
+        XB_TRACE(bp, "no_daddr", data);
        return bp;
 fail_free_mem:
        kmem_free(data, malloc_len);
 fail_free_buf:
-        pagebuf_free(bp);
+        xfs_buf_free(bp);
 fail:
        return NULL;
 }
 /*
- *      pagebuf_hold
- *
 *      Increment reference count on buffer, to hold the buffer concurrently
 *      with another thread which may release (free) the buffer asynchronously.
- *
 *      Must hold the buffer already to call this function.
 */
 void
-pagebuf_hold(
+xfs_buf_hold(
-        xfs_buf_t               *pb)
+        xfs_buf_t               *bp)
 {
-        atomic_inc(&pb->pb_hold);
+        atomic_inc(&bp->b_hold);
-        PB_TRACE(pb, "hold", 0);
+        XB_TRACE(bp, "hold", 0);
 }
 /*
- *      pagebuf_rele
+ *      Releases a hold on the specified buffer.  If the
- *
+ *      the hold count is 1, calls xfs_buf_free.
- *      pagebuf_rele releases a hold on the specified buffer.  If the
- *      the hold count is 1, pagebuf_rele calls pagebuf_free.
 */
 void
-pagebuf_rele(
+xfs_buf_rele(
-        xfs_buf_t               *pb)
+        xfs_buf_t               *bp)
 {
-        xfs_bufhash_t           *hash = pb->pb_hash;
+        xfs_bufhash_t           *hash = bp->b_hash;
-        PB_TRACE(pb, "rele", pb->pb_relse);
+        XB_TRACE(bp, "rele", bp->b_relse);
-        if (atomic_dec_and_lock(&pb->pb_hold, &hash->bh_lock)) {
+        if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) {
-                if (pb->pb_relse) {
+                if (bp->b_relse) {
-                        atomic_inc(&pb->pb_hold);
+                        atomic_inc(&bp->b_hold);
                        spin_unlock(&hash->bh_lock);
-                        (*(pb->pb_relse)) (pb);
+                        (*(bp->b_relse)) (bp);
-                } else if (pb->pb_flags & PBF_FS_MANAGED) {
+                } else if (bp->b_flags & XBF_FS_MANAGED) {
                        spin_unlock(&hash->bh_lock);
                } else {
-                        ASSERT(!(pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)));
+                        ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)));
-                        list_del_init(&pb->pb_hash_list);
+                        list_del_init(&bp->b_hash_list);
                        spin_unlock(&hash->bh_lock);
-                        pagebuf_free(pb);
+                        xfs_buf_free(bp);
                }
        } else {
                /*
                 * Catch reference count leaks
                 */
-                ASSERT(atomic_read(&pb->pb_hold) >= 0);
+                ASSERT(atomic_read(&bp->b_hold) >= 0);
        }
 }
@@ -863,168 +855,122 @@ pagebuf_rele(
 */
 /*
- *      pagebuf_cond_lock
+ *      Locks a buffer object, if it is not already locked.
- *
+ *      Note that this in no way locks the underlying pages, so it is only
- *      pagebuf_cond_lock locks a buffer object, if it is not already locked.
+ *      useful for synchronizing concurrent use of buffer objects, not for
- *      Note that this in no way
+ *      synchronizing independent access to the underlying pages.
- *      locks the underlying pages, so it is only useful for synchronizing
- *      concurrent use of page buffer objects, not for synchronizing independent
- *      access to the underlying pages.
 */
 int
-pagebuf_cond_lock(                      /* lock buffer, if not locked   */
+xfs_buf_cond_lock(
-                                        /* returns -EBUSY if locked)    */
+        xfs_buf_t               *bp)
-        xfs_buf_t               *pb)
 {
        int                     locked;
-        locked = down_trylock(&pb->pb_sema) == 0;
+        locked = down_trylock(&bp->b_sema) == 0;
        if (locked) {
-                PB_SET_OWNER(pb);
+                XB_SET_OWNER(bp);
        }
-        PB_TRACE(pb, "cond_lock", (long)locked);
+        XB_TRACE(bp, "cond_lock", (long)locked);
-        return(locked ? 0 : -EBUSY);
+        return locked ? 0 : -EBUSY;
 }
 #if defined(DEBUG) || defined(XFS_BLI_TRACE)
-/*
- *      pagebuf_lock_value
- *
- *      Return lock value for a pagebuf
- */
 int
-pagebuf_lock_value(
+xfs_buf_lock_value(
-        xfs_buf_t               *pb)
+        xfs_buf_t               *bp)
 {
-        return(atomic_read(&pb->pb_sema.count));
+        return atomic_read(&bp->b_sema.count);
 }
 #endif
 /*
- *      pagebuf_lock
+ *      Locks a buffer object.
- *
+ *      Note that this in no way locks the underlying pages, so it is only
- *      pagebuf_lock locks a buffer object.  Note that this in no way
+ *      useful for synchronizing concurrent use of buffer objects, not for
- *      locks the underlying pages, so it is only useful for synchronizing
+ *      synchronizing independent access to the underlying pages.
- *      concurrent use of page buffer objects, not for synchronizing independent
- *      access to the underlying pages.
 */
-int
+void
-pagebuf_lock(
+xfs_buf_lock(
-        xfs_buf_t               *pb)
+        xfs_buf_t               *bp)
 {
-        PB_TRACE(pb, "lock", 0);
+        XB_TRACE(bp, "lock", 0);
-        if (atomic_read(&pb->pb_io_remaining))
+        if (atomic_read(&bp->b_io_remaining))
-                blk_run_address_space(pb->pb_target->pbr_mapping);
+                blk_run_address_space(bp->b_target->bt_mapping);
-        down(&pb->pb_sema);
+        down(&bp->b_sema);
-        PB_SET_OWNER(pb);
+        XB_SET_OWNER(bp);
-        PB_TRACE(pb, "locked", 0);
+        XB_TRACE(bp, "locked", 0);
-        return 0;
 }
 /*
- *      pagebuf_unlock
+ *      Releases the lock on the buffer object.
- *
- *      pagebuf_unlock releases the lock on the buffer object created by
- *      pagebuf_lock or pagebuf_cond_lock (not any pinning of underlying pages
- *      created by pagebuf_pin).
- *
 *      If the buffer is marked delwri but is not queued, do so before we
- *      unlock the buffer as we need to set flags correctly. We also need to
+ *      unlock the buffer as we need to set flags correctly.  We also need to
 *      take a reference for the delwri queue because the unlocker is going to
 *      drop their's and they don't know we just queued it.
 */
 void
-pagebuf_unlock(                         /* unlock buffer                */
+xfs_buf_unlock(
-        xfs_buf_t               *pb)    /* buffer to unlock             */
+        xfs_buf_t               *bp)
 {
-        if ((pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)) == PBF_DELWRI) {
+        if ((bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)) == XBF_DELWRI) {
-                atomic_inc(&pb->pb_hold);
+                atomic_inc(&bp->b_hold);
-                pb->pb_flags |= PBF_ASYNC;
+                bp->b_flags |= XBF_ASYNC;
-                pagebuf_delwri_queue(pb, 0);
+                xfs_buf_delwri_queue(bp, 0);
        }
-        PB_CLEAR_OWNER(pb);
+        XB_CLEAR_OWNER(bp);
-        up(&pb->pb_sema);
+        up(&bp->b_sema);
-        PB_TRACE(pb, "unlock", 0);
+        XB_TRACE(bp, "unlock", 0);
 }
 /*
 *      Pinning Buffer Storage in Memory
- */
+ *      Ensure that no attempt to force a buffer to disk will succeed.
-/*
- *      pagebuf_pin
- *
- *      pagebuf_pin locks all of the memory represented by a buffer in
- *      memory.  Multiple calls to pagebuf_pin and pagebuf_unpin, for
- *      the same or different buffers affecting a given page, will
- *      properly count the number of outstanding "pin" requests.  The
- *      buffer may be released after the pagebuf_pin and a different
- *      buffer used when calling pagebuf_unpin, if desired.
- *      pagebuf_pin should be used by the file system when it wants be
- *      assured that no attempt will be made to force the affected
- *      memory to disk.  It does not assure that a given logical page
- *      will not be moved to a different physical page.
 */
 void
-pagebuf_pin(
+xfs_buf_pin(
-        xfs_buf_t               *pb)
+        xfs_buf_t               *bp)
 {
-        atomic_inc(&pb->pb_pin_count);
+        atomic_inc(&bp->b_pin_count);
-        PB_TRACE(pb, "pin", (long)pb->pb_pin_count.counter);
+        XB_TRACE(bp, "pin", (long)bp->b_pin_count.counter);
 }
-/*
- *      pagebuf_unpin
- *
- *      pagebuf_unpin reverses the locking of memory performed by
- *      pagebuf_pin.  Note that both functions affected the logical
- *      pages associated with the buffer, not the buffer itself.
- */
 void
-pagebuf_unpin(
+xfs_buf_unpin(
-        xfs_buf_t               *pb)
+        xfs_buf_t               *bp)
 {
-        if (atomic_dec_and_test(&pb->pb_pin_count)) {
+        if (atomic_dec_and_test(&bp->b_pin_count))
-                wake_up_all(&pb->pb_waiters);
+                wake_up_all(&bp->b_waiters);
-        }
+        XB_TRACE(bp, "unpin", (long)bp->b_pin_count.counter);
-        PB_TRACE(pb, "unpin", (long)pb->pb_pin_count.counter);
 }
 int
-pagebuf_ispin(
+xfs_buf_ispin(
-        xfs_buf_t               *pb)
+        xfs_buf_t               *bp)
 {
-        return atomic_read(&pb->pb_pin_count);
+        return atomic_read(&bp->b_pin_count);
 }
-/*
+STATIC void
- *      pagebuf_wait_unpin
+xfs_buf_wait_unpin(
- *
+        xfs_buf_t               *bp)
- *      pagebuf_wait_unpin waits until all of the memory associated
- *      with the buffer is not longer locked in memory.  It returns
- *      immediately if none of the affected pages are locked.
- */
-static inline void
-_pagebuf_wait_unpin(
-        xfs_buf_t               *pb)
 {
        DECLARE_WAITQUEUE       (wait, current);
-        if (atomic_read(&pb->pb_pin_count) == 0)
+        if (atomic_read(&bp->b_pin_count) == 0)
                return;
-        add_wait_queue(&pb->pb_waiters, &wait);
+        add_wait_queue(&bp->b_waiters, &wait);
        for (;;) {
                set_current_state(TASK_UNINTERRUPTIBLE);
-                if (atomic_read(&pb->pb_pin_count) == 0)
+                if (atomic_read(&bp->b_pin_count) == 0)
                        break;
-                if (atomic_read(&pb->pb_io_remaining))
+                if (atomic_read(&bp->b_io_remaining))
-                        blk_run_address_space(pb->pb_target->pbr_mapping);
+                        blk_run_address_space(bp->b_target->bt_mapping);
                schedule();
        }
-        remove_wait_queue(&pb->pb_waiters, &wait);
+        remove_wait_queue(&bp->b_waiters, &wait);
        set_current_state(TASK_RUNNING);
 }
@@ -1032,241 +978,216 @@ _pagebuf_wait_unpin(
 *      Buffer Utility Routines
 */
-/*
- *      pagebuf_iodone
- *
- *      pagebuf_iodone marks a buffer for which I/O is in progress
- *      done with respect to that I/O.  The pb_iodone routine, if
- *      present, will be called as a side-effect.
- */
 STATIC void
-pagebuf_iodone_work(
+xfs_buf_iodone_work(
        void                    *v)
 {
        xfs_buf_t               *bp = (xfs_buf_t *)v;
-        if (bp->pb_iodone)
+        if (bp->b_iodone)
-                (*(bp->pb_iodone))(bp);
+                (*(bp->b_iodone))(bp);
-        else if (bp->pb_flags & PBF_ASYNC)
+        else if (bp->b_flags & XBF_ASYNC)
                xfs_buf_relse(bp);
 }
 void
-pagebuf_iodone(
+xfs_buf_ioend(
-        xfs_buf_t               *pb,
+        xfs_buf_t               *bp,
        int                     schedule)
 {
-        pb->pb_flags &= ~(PBF_READ | PBF_WRITE);
+        bp->b_flags &= ~(XBF_READ | XBF_WRITE);
-        if (pb->pb_error == 0)
+        if (bp->b_error == 0)
-                pb->pb_flags |= PBF_DONE;
+                bp->b_flags |= XBF_DONE;
-        PB_TRACE(pb, "iodone", pb->pb_iodone);
+        XB_TRACE(bp, "iodone", bp->b_iodone);
-        if ((pb->pb_iodone) || (pb->pb_flags & PBF_ASYNC)) {
+        if ((bp->b_iodone) || (bp->b_flags & XBF_ASYNC)) {
                if (schedule) {
-                        INIT_WORK(&pb->pb_iodone_work, pagebuf_iodone_work, pb);
+                        INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work, bp);
-                        queue_work(xfslogd_workqueue, &pb->pb_iodone_work);
+                        queue_work(xfslogd_workqueue, &bp->b_iodone_work);
                } else {
-                        pagebuf_iodone_work(pb);
+                        xfs_buf_iodone_work(bp);
                }
        } else {
-                up(&pb->pb_iodonesema);
+                up(&bp->b_iodonesema);
        }
 }
-/*
- *      pagebuf_ioerror
- *
- *      pagebuf_ioerror sets the error code for a buffer.
- */
 void
-pagebuf_ioerror(                        /* mark/clear buffer error flag */
+xfs_buf_ioerror(
-        xfs_buf_t               *pb,    /* buffer to mark               */
+        xfs_buf_t               *bp,
-        int                     error)  /* error to store (0 if none)   */
+        int                     error)
 {
        ASSERT(error >= 0 && error <= 0xffff);
-        pb->pb_error = (unsigned short)error;
+        bp->b_error = (unsigned short)error;
-        PB_TRACE(pb, "ioerror", (unsigned long)error);
+        XB_TRACE(bp, "ioerror", (unsigned long)error);
 }
 /*
- *      pagebuf_iostart
+ *      Initiate I/O on a buffer, based on the flags supplied.
- *
+ *      The b_iodone routine in the buffer supplied will only be called
- *      pagebuf_iostart initiates I/O on a buffer, based on the flags supplied.
- *      If necessary, it will arrange for any disk space allocation required,
- *      and it will break up the request if the block mappings require it.
- *      The pb_iodone routine in the buffer supplied will only be called
 *      when all of the subsidiary I/O requests, if any, have been completed.
- *      pagebuf_iostart calls the pagebuf_ioinitiate routine or
- *      pagebuf_iorequest, if the former routine is not defined, to start
- *      the I/O on a given low-level request.
 */
 int
-pagebuf_iostart(                        /* start I/O on a buffer          */
+xfs_buf_iostart(
-        xfs_buf_t               *pb,    /* buffer to start                */
+        xfs_buf_t               *bp,
-        page_buf_flags_t        flags)  /* PBF_LOCK, PBF_ASYNC, PBF_READ, */
+        xfs_buf_flags_t         flags)
-                                        /* PBF_WRITE, PBF_DELWRI,         */
-                                        /* PBF_DONT_BLOCK                 */
 {
        int                     status = 0;
-        PB_TRACE(pb, "iostart", (unsigned long)flags);
+        XB_TRACE(bp, "iostart", (unsigned long)flags);
-        if (flags & PBF_DELWRI) {
+        if (flags & XBF_DELWRI) {
-                pb->pb_flags &= ~(PBF_READ | PBF_WRITE | PBF_ASYNC);
+                bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC);
-                pb->pb_flags |= flags & (PBF_DELWRI | PBF_ASYNC);
+                bp->b_flags |= flags & (XBF_DELWRI | XBF_ASYNC);
-                pagebuf_delwri_queue(pb, 1);
+                xfs_buf_delwri_queue(bp, 1);
                return status;
        }
-        pb->pb_flags &= ~(PBF_READ | PBF_WRITE | PBF_ASYNC | PBF_DELWRI | \
+        bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \
-                        PBF_READ_AHEAD | _PBF_RUN_QUEUES);
+                        XBF_READ_AHEAD | _XBF_RUN_QUEUES);
-        pb->pb_flags |= flags & (PBF_READ | PBF_WRITE | PBF_ASYNC | \
+        bp->b_flags |= flags & (XBF_READ | XBF_WRITE | XBF_ASYNC | \
-                        PBF_READ_AHEAD | _PBF_RUN_QUEUES);
+                        XBF_READ_AHEAD | _XBF_RUN_QUEUES);
-        BUG_ON(pb->pb_bn == XFS_BUF_DADDR_NULL);
+        BUG_ON(bp->b_bn == XFS_BUF_DADDR_NULL);
        /* For writes allow an alternate strategy routine to precede
         * the actual I/O request (which may not be issued at all in
         * a shutdown situation, for example).
         */
-        status = (flags & PBF_WRITE) ?
+        status = (flags & XBF_WRITE) ?
-                pagebuf_iostrategy(pb) : pagebuf_iorequest(pb);
+                xfs_buf_iostrategy(bp) : xfs_buf_iorequest(bp);
        /* Wait for I/O if we are not an async request.
         * Note: async I/O request completion will release the buffer,
         * and that can already be done by this point.  So using the
         * buffer pointer from here on, after async I/O, is invalid.
         */
-        if (!status && !(flags & PBF_ASYNC))
+        if (!status && !(flags & XBF_ASYNC))
-                status = pagebuf_iowait(pb);
+                status = xfs_buf_iowait(bp);
        return status;
 }
-/*
- * Helper routine for pagebuf_iorequest
- */
 STATIC __inline__ int
-_pagebuf_iolocked(
+_xfs_buf_iolocked(
-        xfs_buf_t               *pb)
+        xfs_buf_t               *bp)
 {
-        ASSERT(pb->pb_flags & (PBF_READ|PBF_WRITE));
+        ASSERT(bp->b_flags & (XBF_READ | XBF_WRITE));
-        if (pb->pb_flags & PBF_READ)
+        if (bp->b_flags & XBF_READ)
-                return pb->pb_locked;
+                return bp->b_locked;
        return 0;
 }
 STATIC __inline__ void
-_pagebuf_iodone(
+_xfs_buf_ioend(
-        xfs_buf_t               *pb,
+        xfs_buf_t               *bp,
        int                     schedule)
 {
-        if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {
+        if (atomic_dec_and_test(&bp->b_io_remaining) == 1) {
-                pb->pb_locked = 0;
+                bp->b_locked = 0;
-                pagebuf_iodone(pb, schedule);
+                xfs_buf_ioend(bp, schedule);
        }
 }
 STATIC int
-bio_end_io_pagebuf(
+xfs_buf_bio_end_io(
        struct bio              *bio,
        unsigned int            bytes_done,
        int                     error)
 {
-        xfs_buf_t               *pb = (xfs_buf_t *)bio->bi_private;
+        xfs_buf_t               *bp = (xfs_buf_t *)bio->bi_private;
-        unsigned int            blocksize = pb->pb_target->pbr_bsize;
+        unsigned int            blocksize = bp->b_target->bt_bsize;
        struct bio_vec          *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
        if (bio->bi_size)
                return 1;
        if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
-                pb->pb_error = EIO;
+                bp->b_error = EIO;
        do {
                struct page     *page = bvec->bv_page;
-                if (unlikely(pb->pb_error)) {
+                if (unlikely(bp->b_error)) {
-                        if (pb->pb_flags & PBF_READ)
+                        if (bp->b_flags & XBF_READ)
                                ClearPageUptodate(page);
                        SetPageError(page);
-                } else if (blocksize == PAGE_CACHE_SIZE) {
+                } else if (blocksize >= PAGE_CACHE_SIZE) {
                        SetPageUptodate(page);
                } else if (!PagePrivate(page) &&
-                                (pb->pb_flags & _PBF_PAGE_CACHE)) {
+                                (bp->b_flags & _XBF_PAGE_CACHE)) {
                        set_page_region(page, bvec->bv_offset, bvec->bv_len);
                }
                if (--bvec >= bio->bi_io_vec)
                        prefetchw(&bvec->bv_page->flags);
-                if (_pagebuf_iolocked(pb)) {
+                if (_xfs_buf_iolocked(bp)) {
                        unlock_page(page);
                }
        } while (bvec >= bio->bi_io_vec);
-        _pagebuf_iodone(pb, 1);
+        _xfs_buf_ioend(bp, 1);
        bio_put(bio);
        return 0;
 }
 STATIC void
-_pagebuf_ioapply(
+_xfs_buf_ioapply(
-        xfs_buf_t               *pb)
+        xfs_buf_t               *bp)
 {
        int                     i, rw, map_i, total_nr_pages, nr_pages;
        struct bio              *bio;
-        int                     offset = pb->pb_offset;
+        int                     offset = bp->b_offset;
-        int                     size = pb->pb_count_desired;
+        int                     size = bp->b_count_desired;
-        sector_t                sector = pb->pb_bn;
+        sector_t                sector = bp->b_bn;
-        unsigned int            blocksize = pb->pb_target->pbr_bsize;
+        unsigned int            blocksize = bp->b_target->bt_bsize;
-        int                     locking = _pagebuf_iolocked(pb);
+        int                     locking = _xfs_buf_iolocked(bp);
-        total_nr_pages = pb->pb_page_count;
+        total_nr_pages = bp->b_page_count;
        map_i = 0;
-        if (pb->pb_flags & _PBF_RUN_QUEUES) {
+        if (bp->b_flags & _XBF_RUN_QUEUES) {
-                pb->pb_flags &= ~_PBF_RUN_QUEUES;
+                bp->b_flags &= ~_XBF_RUN_QUEUES;
-                rw = (pb->pb_flags & PBF_READ) ? READ_SYNC : WRITE_SYNC;
+                rw = (bp->b_flags & XBF_READ) ? READ_SYNC : WRITE_SYNC;
        } else {
-                rw = (pb->pb_flags & PBF_READ) ? READ : WRITE;
+                rw = (bp->b_flags & XBF_READ) ? READ : WRITE;
        }
-        if (pb->pb_flags & PBF_ORDERED) {
+        if (bp->b_flags & XBF_ORDERED) {
-                ASSERT(!(pb->pb_flags & PBF_READ));
+                ASSERT(!(bp->b_flags & XBF_READ));
                rw = WRITE_BARRIER;
        }
-        /* Special code path for reading a sub page size pagebuf in --
+        /* Special code path for reading a sub page size buffer in --
         * we populate up the whole page, and hence the other metadata
         * in the same page.  This optimization is only valid when the
-         * filesystem block size and the page size are equal.
+         * filesystem block size is not smaller than the page size.
         */
-        if ((pb->pb_buffer_length < PAGE_CACHE_SIZE) &&
+        if ((bp->b_buffer_length < PAGE_CACHE_SIZE) &&
-            (pb->pb_flags & PBF_READ) && locking &&
+            (bp->b_flags & XBF_READ) && locking &&
-            (blocksize == PAGE_CACHE_SIZE)) {
+            (blocksize >= PAGE_CACHE_SIZE)) {
                bio = bio_alloc(GFP_NOIO, 1);
-                bio->bi_bdev = pb->pb_target->pbr_bdev;
+                bio->bi_bdev = bp->b_target->bt_bdev;
                bio->bi_sector = sector - (offset >> BBSHIFT);
-                bio->bi_end_io = bio_end_io_pagebuf;
+                bio->bi_end_io = xfs_buf_bio_end_io;
-                bio->bi_private = pb;
+                bio->bi_private = bp;
-                bio_add_page(bio, pb->pb_pages[0], PAGE_CACHE_SIZE, 0);
+                bio_add_page(bio, bp->b_pages[0], PAGE_CACHE_SIZE, 0);
                size = 0;
-                atomic_inc(&pb->pb_io_remaining);
+                atomic_inc(&bp->b_io_remaining);
                goto submit_io;
        }
        /* Lock down the pages which we need to for the request */
-        if (locking && (pb->pb_flags & PBF_WRITE) && (pb->pb_locked == 0)) {
+        if (locking && (bp->b_flags & XBF_WRITE) && (bp->b_locked == 0)) {
                for (i = 0; size; i++) {
                        int             nbytes = PAGE_CACHE_SIZE - offset;
-                        struct page     *page = pb->pb_pages[i];
+                        struct page     *page = bp->b_pages[i];
                        if (nbytes > size)
                                nbytes = size;
@@ -1276,30 +1197,30 @@ _pagebuf_ioapply(
                        size -= nbytes;
                        offset = 0;
                }
-                offset = pb->pb_offset;
+                offset = bp->b_offset;
-                size = pb->pb_count_desired;
+                size = bp->b_count_desired;
        }
 next_chunk:
-        atomic_inc(&pb->pb_io_remaining);
+        atomic_inc(&bp->b_io_remaining);
        nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT);
        if (nr_pages > total_nr_pages)
                nr_pages = total_nr_pages;
        bio = bio_alloc(GFP_NOIO, nr_pages);
-        bio->bi_bdev = pb->pb_target->pbr_bdev;
+        bio->bi_bdev = bp->b_target->bt_bdev;
        bio->bi_sector = sector;
-        bio->bi_end_io = bio_end_io_pagebuf;
+        bio->bi_end_io = xfs_buf_bio_end_io;
-        bio->bi_private = pb;
+        bio->bi_private = bp;
        for (; size && nr_pages; nr_pages--, map_i++) {
-                int     nbytes = PAGE_CACHE_SIZE - offset;
+                int     rbytes, nbytes = PAGE_CACHE_SIZE - offset;
                if (nbytes > size)
                        nbytes = size;
-                if (bio_add_page(bio, pb->pb_pages[map_i],
+                rbytes = bio_add_page(bio, bp->b_pages[map_i], nbytes, offset);
-                                        nbytes, offset) < nbytes)
+                if (rbytes < nbytes)
                        break;
                offset = 0;
@@ -1315,107 +1236,102 @@ submit_io:
                        goto next_chunk;
        } else {
                bio_put(bio);
-                pagebuf_ioerror(pb, EIO);
+                xfs_buf_ioerror(bp, EIO);
        }
 }
-/*
- *      pagebuf_iorequest -- the core I/O request routine.
- */
 int
-pagebuf_iorequest(                      /* start real I/O               */
+xfs_buf_iorequest(
-        xfs_buf_t               *pb)    /* buffer to convey to device   */
+        xfs_buf_t               *bp)
 {
-        PB_TRACE(pb, "iorequest", 0);
+        XB_TRACE(bp, "iorequest", 0);
-        if (pb->pb_flags & PBF_DELWRI) {
+        if (bp->b_flags & XBF_DELWRI) {
-                pagebuf_delwri_queue(pb, 1);
+                xfs_buf_delwri_queue(bp, 1);
                return 0;
        }
-        if (pb->pb_flags & PBF_WRITE) {
+        if (bp->b_flags & XBF_WRITE) {
-                _pagebuf_wait_unpin(pb);
+                xfs_buf_wait_unpin(bp);
        }
-        pagebuf_hold(pb);
+        xfs_buf_hold(bp);
        /* Set the count to 1 initially, this will stop an I/O
         * completion callout which happens before we have started
-         * all the I/O from calling pagebuf_iodone too early.
+         * all the I/O from calling xfs_buf_ioend too early.
         */
-        atomic_set(&pb->pb_io_remaining, 1);
+        atomic_set(&bp->b_io_remaining, 1);
-        _pagebuf_ioapply(pb);
+        _xfs_buf_ioapply(bp);
-        _pagebuf_iodone(pb, 0);
+        _xfs_buf_ioend(bp, 0);
-        pagebuf_rele(pb);
+        xfs_buf_rele(bp);
        return 0;
 }
 /*
- *      pagebuf_iowait
+ *      Waits for I/O to complete on the buffer supplied.
- *
+ *      It returns immediately if no I/O is pending.
- *      pagebuf_iowait waits for I/O to complete on the buffer supplied.
+ *      It returns the I/O error code, if any, or 0 if there was no error.
- *      It returns immediately if no I/O is pending.  In any case, it returns
- *      the error code, if any, or 0 if there is no error.
 */
 int
-pagebuf_iowait(
+xfs_buf_iowait(
-        xfs_buf_t               *pb)
+        xfs_buf_t               *bp)
 {
-        PB_TRACE(pb, "iowait", 0);
+        XB_TRACE(bp, "iowait", 0);
-        if (atomic_read(&pb->pb_io_remaining))
+        if (atomic_read(&bp->b_io_remaining))
-                blk_run_address_space(pb->pb_target->pbr_mapping);
+                blk_run_address_space(bp->b_target->bt_mapping);
-        down(&pb->pb_iodonesema);
+        down(&bp->b_iodonesema);
-        PB_TRACE(pb, "iowaited", (long)pb->pb_error);
+        XB_TRACE(bp, "iowaited", (long)bp->b_error);
-        return pb->pb_error;
+        return bp->b_error;
 }
-caddr_t
+xfs_caddr_t
-pagebuf_offset(
+xfs_buf_offset(
-        xfs_buf_t               *pb,
+        xfs_buf_t               *bp,
        size_t                  offset)
 {
        struct page             *page;
-        offset += pb->pb_offset;
+        if (bp->b_flags & XBF_MAPPED)
+                return XFS_BUF_PTR(bp) + offset;
-        page = pb->pb_pages[offset >> PAGE_CACHE_SHIFT];
+        offset += bp->b_offset;
-        return (caddr_t) page_address(page) + (offset & (PAGE_CACHE_SIZE - 1));
+        page = bp->b_pages[offset >> PAGE_CACHE_SHIFT];
+        return (xfs_caddr_t)page_address(page) + (offset & (PAGE_CACHE_SIZE-1));
 }
 /*
- *      pagebuf_iomove
- *
 *      Move data into or out of a buffer.
 */
 void
-pagebuf_iomove(
+xfs_buf_iomove(
-        xfs_buf_t               *pb,    /* buffer to process            */
+        xfs_buf_t               *bp,    /* buffer to process            */
        size_t                  boff,   /* starting buffer offset       */
        size_t                  bsize,  /* length to copy               */
        caddr_t                 data,   /* data address                 */
-        page_buf_rw_t           mode)   /* read/write flag              */
+        xfs_buf_rw_t            mode)   /* read/write/zero flag         */
 {
        size_t                  bend, cpoff, csize;
        struct page             *page;
        bend = boff + bsize;
        while (boff < bend) {
-                page = pb->pb_pages[page_buf_btoct(boff + pb->pb_offset)];
+                page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)];
-                cpoff = page_buf_poff(boff + pb->pb_offset);
+                cpoff = xfs_buf_poff(boff + bp->b_offset);
                csize = min_t(size_t,
-                              PAGE_CACHE_SIZE-cpoff, pb->pb_count_desired-boff);
+                              PAGE_CACHE_SIZE-cpoff, bp->b_count_desired-boff);
                ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE));
                switch (mode) {
-                case PBRW_ZERO:
+                case XBRW_ZERO:
                        memset(page_address(page) + cpoff, 0, csize);
                        break;
-                case PBRW_READ:
+                case XBRW_READ:
                        memcpy(data, page_address(page) + cpoff, csize);
                        break;
-                case PBRW_WRITE:
+                case XBRW_WRITE:
                        memcpy(page_address(page) + cpoff, data, csize);
                }
@@ -1425,12 +1341,12 @@ pagebuf_iomove(
 }
 /*
- *      Handling of buftargs.
+ *      Handling of buffer targets (buftargs).
 */
 /*
- * Wait for any bufs with callbacks that have been submitted but
+ *      Wait for any bufs with callbacks that have been submitted but
- * have not yet returned... walk the hash list for the target.
+ *      have not yet returned... walk the hash list for the target.
 */
 void
 xfs_wait_buftarg(
@@ -1444,15 +1360,15 @@ xfs_wait_buftarg(
                hash = &btp->bt_hash[i];
 again:
                spin_lock(&hash->bh_lock);
-                list_for_each_entry_safe(bp, n, &hash->bh_list, pb_hash_list) {
+                list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) {
-                        ASSERT(btp == bp->pb_target);
+                        ASSERT(btp == bp->b_target);
-                        if (!(bp->pb_flags & PBF_FS_MANAGED)) {
+                        if (!(bp->b_flags & XBF_FS_MANAGED)) {
                                spin_unlock(&hash->bh_lock);
                                /*
                                 * Catch superblock reference count leaks
                                 * immediately
                                 */
-                                BUG_ON(bp->pb_bn == 0);
+                                BUG_ON(bp->b_bn == 0);
                                delay(100);
                                goto again;
                        }
@@ -1462,9 +1378,9 @@ again:
 }
 /*
- * Allocate buffer hash table for a given target.
+ *      Allocate buffer hash table for a given target.
- * For devices containing metadata (i.e. not the log/realtime devices)
+ *      For devices containing metadata (i.e. not the log/realtime devices)
- * we need to allocate a much larger hash table.
+ *      we need to allocate a much larger hash table.
 */
 STATIC void
 xfs_alloc_bufhash(
@@ -1487,11 +1403,34 @@ STATIC void
 xfs_free_bufhash(
        xfs_buftarg_t           *btp)
 {
-        kmem_free(btp->bt_hash,
+        kmem_free(btp->bt_hash, (1<<btp->bt_hashshift) * sizeof(xfs_bufhash_t));
-                        (1 << btp->bt_hashshift) * sizeof(xfs_bufhash_t));
        btp->bt_hash = NULL;
 }
+/*
+ *      buftarg list for delwrite queue processing
+ */
+STATIC LIST_HEAD(xfs_buftarg_list);
+STATIC DEFINE_SPINLOCK(xfs_buftarg_lock);
+STATIC void
+xfs_register_buftarg(
+        xfs_buftarg_t           *btp)
+{
+        spin_lock(&xfs_buftarg_lock);
+        list_add(&btp->bt_list, &xfs_buftarg_list);
+        spin_unlock(&xfs_buftarg_lock);
+}
+STATIC void
+xfs_unregister_buftarg(
+        xfs_buftarg_t           *btp)
+{
+        spin_lock(&xfs_buftarg_lock);
+        list_del(&btp->bt_list);
+        spin_unlock(&xfs_buftarg_lock);
+}
 void
 xfs_free_buftarg(
        xfs_buftarg_t           *btp,
@@ -1499,9 +1438,16 @@ xfs_free_buftarg(
 {
        xfs_flush_buftarg(btp, 1);
        if (external)
-                xfs_blkdev_put(btp->pbr_bdev);
+                xfs_blkdev_put(btp->bt_bdev);
        xfs_free_bufhash(btp);
-        iput(btp->pbr_mapping->host);
+        iput(btp->bt_mapping->host);
+        /* Unregister the buftarg first so that we don't get a
+         * wakeup finding a non-existent task
+         */
+        xfs_unregister_buftarg(btp);
+        kthread_stop(btp->bt_task);
        kmem_free(btp, sizeof(*btp));
 }
@@ -1512,11 +1458,11 @@ xfs_setsize_buftarg_flags(
        unsigned int            sectorsize,
        int                     verbose)
 {
-        btp->pbr_bsize = blocksize;
+        btp->bt_bsize = blocksize;
-        btp->pbr_sshift = ffs(sectorsize) - 1;
+        btp->bt_sshift = ffs(sectorsize) - 1;
-        btp->pbr_smask = sectorsize - 1;
+        btp->bt_smask = sectorsize - 1;
-        if (set_blocksize(btp->pbr_bdev, sectorsize)) {
+        if (set_blocksize(btp->bt_bdev, sectorsize)) {
                printk(KERN_WARNING
                        "XFS: Cannot set_blocksize to %u on device %s\n",
                        sectorsize, XFS_BUFTARG_NAME(btp));
@@ -1536,10 +1482,10 @@ xfs_setsize_buftarg_flags(
 }
 /*
-* When allocating the initial buffer target we have not yet
+ *      When allocating the initial buffer target we have not yet
-* read in the superblock, so don't know what sized sectors
+ *      read in the superblock, so don't know what sized sectors
-* are being used is at this early stage.  Play safe.
+ *      are being used is at this early stage.  Play safe.
-*/
+ */
 STATIC int
 xfs_setsize_buftarg_early(
        xfs_buftarg_t           *btp,
@@ -1587,10 +1533,30 @@ xfs_mapping_buftarg(
        mapping->a_ops = &mapping_aops;
        mapping->backing_dev_info = bdi;
        mapping_set_gfp_mask(mapping, GFP_NOFS);
-        btp->pbr_mapping = mapping;
+        btp->bt_mapping = mapping;
        return 0;
 }
+STATIC int
+xfs_alloc_delwrite_queue(
+        xfs_buftarg_t           *btp)
+{
+        int     error = 0;
+        INIT_LIST_HEAD(&btp->bt_list);
+        INIT_LIST_HEAD(&btp->bt_delwrite_queue);
+        spinlock_init(&btp->bt_delwrite_lock, "delwri_lock");
+        btp->bt_flags = 0;
+        btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd");
+        if (IS_ERR(btp->bt_task)) {
+                error = PTR_ERR(btp->bt_task);
+                goto out_error;
+        }
+        xfs_register_buftarg(btp);
+out_error:
+        return error;
+}
 xfs_buftarg_t *
 xfs_alloc_buftarg(
        struct block_device     *bdev,
@@ -1600,12 +1566,14 @@ xfs_alloc_buftarg(
        btp = kmem_zalloc(sizeof(*btp), KM_SLEEP);
-        btp->pbr_dev =  bdev->bd_dev;
+        btp->bt_dev =  bdev->bd_dev;
-        btp->pbr_bdev = bdev;
+        btp->bt_bdev = bdev;
        if (xfs_setsize_buftarg_early(btp, bdev))
                goto error;
        if (xfs_mapping_buftarg(btp, bdev))
                goto error;
+        if (xfs_alloc_delwrite_queue(btp))
+                goto error;
        xfs_alloc_bufhash(btp, external);
        return btp;
@@ -1616,83 +1584,81 @@ error:
 /*
- * Pagebuf delayed write buffer handling
+ *      Delayed write buffer handling
 */
-STATIC LIST_HEAD(pbd_delwrite_queue);
-STATIC DEFINE_SPINLOCK(pbd_delwrite_lock);
 STATIC void
-pagebuf_delwri_queue(
+xfs_buf_delwri_queue(
-        xfs_buf_t               *pb,
+        xfs_buf_t               *bp,
        int                     unlock)
 {
-        PB_TRACE(pb, "delwri_q", (long)unlock);
+        struct list_head        *dwq = &bp->b_target->bt_delwrite_queue;
-        ASSERT((pb->pb_flags & (PBF_DELWRI|PBF_ASYNC)) ==
+        spinlock_t              *dwlk = &bp->b_target->bt_delwrite_lock;
-                                        (PBF_DELWRI|PBF_ASYNC));
+        XB_TRACE(bp, "delwri_q", (long)unlock);
+        ASSERT((bp->b_flags&(XBF_DELWRI|XBF_ASYNC)) == (XBF_DELWRI|XBF_ASYNC));
-        spin_lock(&pbd_delwrite_lock);
+        spin_lock(dwlk);
        /* If already in the queue, dequeue and place at tail */
-        if (!list_empty(&pb->pb_list)) {
+        if (!list_empty(&bp->b_list)) {
-                ASSERT(pb->pb_flags & _PBF_DELWRI_Q);
+                ASSERT(bp->b_flags & _XBF_DELWRI_Q);
-                if (unlock) {
+                if (unlock)
-                        atomic_dec(&pb->pb_hold);
+                        atomic_dec(&bp->b_hold);
-                }
+                list_del(&bp->b_list);
-                list_del(&pb->pb_list);
        }
-        pb->pb_flags |= _PBF_DELWRI_Q;
+        bp->b_flags |= _XBF_DELWRI_Q;
-        list_add_tail(&pb->pb_list, &pbd_delwrite_queue);
+        list_add_tail(&bp->b_list, dwq);
-        pb->pb_queuetime = jiffies;
+        bp->b_queuetime = jiffies;
-        spin_unlock(&pbd_delwrite_lock);
+        spin_unlock(dwlk);
        if (unlock)
-                pagebuf_unlock(pb);
+                xfs_buf_unlock(bp);
 }
 void
-pagebuf_delwri_dequeue(
+xfs_buf_delwri_dequeue(
-        xfs_buf_t               *pb)
+        xfs_buf_t               *bp)
 {
+        spinlock_t              *dwlk = &bp->b_target->bt_delwrite_lock;
        int                     dequeued = 0;
-        spin_lock(&pbd_delwrite_lock);
+        spin_lock(dwlk);
-        if ((pb->pb_flags & PBF_DELWRI) && !list_empty(&pb->pb_list)) {
+        if ((bp->b_flags & XBF_DELWRI) && !list_empty(&bp->b_list)) {
-                ASSERT(pb->pb_flags & _PBF_DELWRI_Q);
+                ASSERT(bp->b_flags & _XBF_DELWRI_Q);
-                list_del_init(&pb->pb_list);
+                list_del_init(&bp->b_list);
                dequeued = 1;
        }
-        pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q);
+        bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q);
-        spin_unlock(&pbd_delwrite_lock);
+        spin_unlock(dwlk);
        if (dequeued)
-                pagebuf_rele(pb);
+                xfs_buf_rele(bp);
-        PB_TRACE(pb, "delwri_dq", (long)dequeued);
+        XB_TRACE(bp, "delwri_dq", (long)dequeued);
 }
 STATIC void
-pagebuf_runall_queues(
+xfs_buf_runall_queues(
        struct workqueue_struct *queue)
 {
        flush_workqueue(queue);
 }
-/* Defines for pagebuf daemon */
-STATIC struct task_struct *xfsbufd_task;
-STATIC int xfsbufd_force_flush;
-STATIC int xfsbufd_force_sleep;
 STATIC int
 xfsbufd_wakeup(
        int                     priority,
        gfp_t                   mask)
 {
-        if (xfsbufd_force_sleep)
+        xfs_buftarg_t           *btp;
-                return 0;
-        xfsbufd_force_flush = 1;
+        spin_lock(&xfs_buftarg_lock);
-        barrier();
+        list_for_each_entry(btp, &xfs_buftarg_list, bt_list) {
-        wake_up_process(xfsbufd_task);
+                if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags))
+                        continue;
+                set_bit(XBT_FORCE_FLUSH, &btp->bt_flags);
+                wake_up_process(btp->bt_task);
+        }
+        spin_unlock(&xfs_buftarg_lock);
        return 0;
 }
@@ -1702,67 +1668,70 @@ xfsbufd(
 {
        struct list_head        tmp;
        unsigned long           age;
-        xfs_buftarg_t           *target;
+        xfs_buftarg_t           *target = (xfs_buftarg_t *)data;
-        xfs_buf_t               *pb, *n;
+        xfs_buf_t               *bp, *n;
+        struct list_head        *dwq = &target->bt_delwrite_queue;
+        spinlock_t              *dwlk = &target->bt_delwrite_lock;
        current->flags |= PF_MEMALLOC;
        INIT_LIST_HEAD(&tmp);
        do {
                if (unlikely(freezing(current))) {
-                        xfsbufd_force_sleep = 1;
+                        set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
                        refrigerator();
                } else {
-                        xfsbufd_force_sleep = 0;
+                        clear_bit(XBT_FORCE_SLEEP, &target->bt_flags);
                }
                schedule_timeout_interruptible(
                        xfs_buf_timer_centisecs * msecs_to_jiffies(10));
                age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
-                spin_lock(&pbd_delwrite_lock);
+                spin_lock(dwlk);
-                list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) {
+                list_for_each_entry_safe(bp, n, dwq, b_list) {
-                        PB_TRACE(pb, "walkq1", (long)pagebuf_ispin(pb));
+                        XB_TRACE(bp, "walkq1", (long)xfs_buf_ispin(bp));
-                        ASSERT(pb->pb_flags & PBF_DELWRI);
+                        ASSERT(bp->b_flags & XBF_DELWRI);
-                        if (!pagebuf_ispin(pb) && !pagebuf_cond_lock(pb)) {
+                        if (!xfs_buf_ispin(bp) && !xfs_buf_cond_lock(bp)) {
-                                if (!xfsbufd_force_flush &&
+                                if (!test_bit(XBT_FORCE_FLUSH,
+                                                &target->bt_flags) &&
                                    time_before(jiffies,
-                                                pb->pb_queuetime + age)) {
+                                                bp->b_queuetime + age)) {
-                                        pagebuf_unlock(pb);
+                                        xfs_buf_unlock(bp);
                                        break;
                                }
-                                pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q);
+                                bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q);
-                                pb->pb_flags |= PBF_WRITE;
+                                bp->b_flags |= XBF_WRITE;
-                                list_move(&pb->pb_list, &tmp);
+                                list_move(&bp->b_list, &tmp);
                        }
                }
-                spin_unlock(&pbd_delwrite_lock);
+                spin_unlock(dwlk);
                while (!list_empty(&tmp)) {
-                        pb = list_entry(tmp.next, xfs_buf_t, pb_list);
+                        bp = list_entry(tmp.next, xfs_buf_t, b_list);
-                        target = pb->pb_target;
+                        ASSERT(target == bp->b_target);
-                        list_del_init(&pb->pb_list);
+                        list_del_init(&bp->b_list);
-                        pagebuf_iostrategy(pb);
+                        xfs_buf_iostrategy(bp);
-                        blk_run_address_space(target->pbr_mapping);
+                        blk_run_address_space(target->bt_mapping);
                }
                if (as_list_len > 0)
                        purge_addresses();
-                xfsbufd_force_flush = 0;
+                clear_bit(XBT_FORCE_FLUSH, &target->bt_flags);
        } while (!kthread_should_stop());
        return 0;
 }
 /*
- * Go through all incore buffers, and release buffers if they belong to
+ *      Go through all incore buffers, and release buffers if they belong to
- * the given device. This is used in filesystem error handling to
+ *      the given device. This is used in filesystem error handling to
- * preserve the consistency of its metadata.
+ *      preserve the consistency of its metadata.
 */
 int
 xfs_flush_buftarg(
@@ -1770,73 +1739,72 @@ xfs_flush_buftarg(
        int                     wait)
 {
        struct list_head        tmp;
-        xfs_buf_t               *pb, *n;
+        xfs_buf_t               *bp, *n;
        int                     pincount = 0;
+        struct list_head        *dwq = &target->bt_delwrite_queue;
+        spinlock_t              *dwlk = &target->bt_delwrite_lock;
-        pagebuf_runall_queues(xfsdatad_workqueue);
+        xfs_buf_runall_queues(xfsdatad_workqueue);
-        pagebuf_runall_queues(xfslogd_workqueue);
+        xfs_buf_runall_queues(xfslogd_workqueue);
        INIT_LIST_HEAD(&tmp);
-        spin_lock(&pbd_delwrite_lock);
+        spin_lock(dwlk);
-        list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) {
+        list_for_each_entry_safe(bp, n, dwq, b_list) {
+                ASSERT(bp->b_target == target);
-                if (pb->pb_target != target)
+                ASSERT(bp->b_flags & (XBF_DELWRI | _XBF_DELWRI_Q));
-                        continue;
+                XB_TRACE(bp, "walkq2", (long)xfs_buf_ispin(bp));
+                if (xfs_buf_ispin(bp)) {
-                ASSERT(pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q));
-                PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb));
-                if (pagebuf_ispin(pb)) {
                        pincount++;
                        continue;
                }
-                list_move(&pb->pb_list, &tmp);
+                list_move(&bp->b_list, &tmp);
        }
-        spin_unlock(&pbd_delwrite_lock);
+        spin_unlock(dwlk);
        /*
         * Dropped the delayed write list lock, now walk the temporary list
         */
-        list_for_each_entry_safe(pb, n, &tmp, pb_list) {
+        list_for_each_entry_safe(bp, n, &tmp, b_list) {
-                pagebuf_lock(pb);
+                xfs_buf_lock(bp);
-                pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q);
+                bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q);
-                pb->pb_flags |= PBF_WRITE;
+                bp->b_flags |= XBF_WRITE;
                if (wait)
-                        pb->pb_flags &= ~PBF_ASYNC;
+                        bp->b_flags &= ~XBF_ASYNC;
                else
-                        list_del_init(&pb->pb_list);
+                        list_del_init(&bp->b_list);
-                pagebuf_iostrategy(pb);
+                xfs_buf_iostrategy(bp);
        }
        /*
         * Remaining list items must be flushed before returning
         */
        while (!list_empty(&tmp)) {
-                pb = list_entry(tmp.next, xfs_buf_t, pb_list);
+                bp = list_entry(tmp.next, xfs_buf_t, b_list);
-                list_del_init(&pb->pb_list);
+                list_del_init(&bp->b_list);
-                xfs_iowait(pb);
+                xfs_iowait(bp);
-                xfs_buf_relse(pb);
+                xfs_buf_relse(bp);
        }
        if (wait)
-                blk_run_address_space(target->pbr_mapping);
+                blk_run_address_space(target->bt_mapping);
        return pincount;
 }
 int __init
-pagebuf_init(void)
+xfs_buf_init(void)
 {
        int             error = -ENOMEM;
-#ifdef PAGEBUF_TRACE
+#ifdef XFS_BUF_TRACE
-        pagebuf_trace_buf = ktrace_alloc(PAGEBUF_TRACE_SIZE, KM_SLEEP);
+        xfs_buf_trace_buf = ktrace_alloc(XFS_BUF_TRACE_SIZE, KM_SLEEP);
 #endif
-        pagebuf_zone = kmem_zone_init(sizeof(xfs_buf_t), "xfs_buf");
+        xfs_buf_zone = kmem_zone_init(sizeof(xfs_buf_t), "xfs_buf");
-        if (!pagebuf_zone)
+        if (!xfs_buf_zone)
                goto out_free_trace_buf;
        xfslogd_workqueue = create_workqueue("xfslogd");
@@ -1847,42 +1815,33 @@ pagebuf_init(void)
        if (!xfsdatad_workqueue)
                goto out_destroy_xfslogd_workqueue;
-        xfsbufd_task = kthread_run(xfsbufd, NULL, "xfsbufd");
+        xfs_buf_shake = kmem_shake_register(xfsbufd_wakeup);
-        if (IS_ERR(xfsbufd_task)) {
+        if (!xfs_buf_shake)
-                error = PTR_ERR(xfsbufd_task);
                goto out_destroy_xfsdatad_workqueue;
-        }
-        pagebuf_shake = kmem_shake_register(xfsbufd_wakeup);
-        if (!pagebuf_shake)
-                goto out_stop_xfsbufd;
        return 0;
- out_stop_xfsbufd:
-        kthread_stop(xfsbufd_task);
 out_destroy_xfsdatad_workqueue:
        destroy_workqueue(xfsdatad_workqueue);
 out_destroy_xfslogd_workqueue:
        destroy_workqueue(xfslogd_workqueue);
 out_free_buf_zone:
-        kmem_zone_destroy(pagebuf_zone);
+        kmem_zone_destroy(xfs_buf_zone);
 out_free_trace_buf:
-#ifdef PAGEBUF_TRACE
+#ifdef XFS_BUF_TRACE
-        ktrace_free(pagebuf_trace_buf);
+        ktrace_free(xfs_buf_trace_buf);
 #endif
        return error;
 }
 void
-pagebuf_terminate(void)
+xfs_buf_terminate(void)
 {
-        kmem_shake_deregister(pagebuf_shake);
+        kmem_shake_deregister(xfs_buf_shake);
-        kthread_stop(xfsbufd_task);
        destroy_workqueue(xfsdatad_workqueue);
        destroy_workqueue(xfslogd_workqueue);
-        kmem_zone_destroy(pagebuf_zone);
+        kmem_zone_destroy(xfs_buf_zone);
-#ifdef PAGEBUF_TRACE
+#ifdef XFS_BUF_TRACE
-        ktrace_free(pagebuf_trace_buf);
+        ktrace_free(xfs_buf_trace_buf);
 #endif
 }
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 237a35b915d1..4dd6592d5a4c 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -32,44 +32,47 @@
 *      Base types
 */
-#define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL))
+#define XFS_BUF_DADDR_NULL      ((xfs_daddr_t) (-1LL))
-#define page_buf_ctob(pp)       ((pp) * PAGE_CACHE_SIZE)
+#define xfs_buf_ctob(pp)        ((pp) * PAGE_CACHE_SIZE)
-#define page_buf_btoc(dd)       (((dd) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT)
+#define xfs_buf_btoc(dd)        (((dd) + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT)
-#define page_buf_btoct(dd)      ((dd) >> PAGE_CACHE_SHIFT)
+#define xfs_buf_btoct(dd)       ((dd) >> PAGE_CACHE_SHIFT)
-#define page_buf_poff(aa)       ((aa) & ~PAGE_CACHE_MASK)
+#define xfs_buf_poff(aa)        ((aa) & ~PAGE_CACHE_MASK)
-typedef enum page_buf_rw_e {
+typedef enum {
-        PBRW_READ = 1,                  /* transfer into target memory */
+        XBRW_READ = 1,                  /* transfer into target memory */
-        PBRW_WRITE = 2,                 /* transfer from target memory */
+        XBRW_WRITE = 2,                 /* transfer from target memory */
-        PBRW_ZERO = 3                   /* Zero target memory */
+        XBRW_ZERO = 3,                  /* Zero target memory */
-} page_buf_rw_t;
+} xfs_buf_rw_t;
+typedef enum {
-typedef enum page_buf_flags_e {         /* pb_flags values */
+        XBF_READ = (1 << 0),    /* buffer intended for reading from device */
-        PBF_READ = (1 << 0),    /* buffer intended for reading from device */
+        XBF_WRITE = (1 << 1),   /* buffer intended for writing to device   */
-        PBF_WRITE = (1 << 1),   /* buffer intended for writing to device   */
+        XBF_MAPPED = (1 << 2),  /* buffer mapped (b_addr valid)            */
-        PBF_MAPPED = (1 << 2),  /* buffer mapped (pb_addr valid)           */
+        XBF_ASYNC = (1 << 4),   /* initiator will not wait for completion  */
-        PBF_ASYNC = (1 << 4),   /* initiator will not wait for completion  */
+        XBF_DONE = (1 << 5),    /* all pages in the buffer uptodate        */
-        PBF_DONE = (1 << 5),    /* all pages in the buffer uptodate        */
+        XBF_DELWRI = (1 << 6),  /* buffer has dirty pages                  */
-        PBF_DELWRI = (1 << 6),  /* buffer has dirty pages                  */
+        XBF_STALE = (1 << 7),   /* buffer has been staled, do not find it  */
-        PBF_STALE = (1 << 7),   /* buffer has been staled, do not find it  */
+        XBF_FS_MANAGED = (1 << 8),  /* filesystem controls freeing memory  */
-        PBF_FS_MANAGED = (1 << 8),  /* filesystem controls freeing memory  */
+        XBF_ORDERED = (1 << 11),    /* use ordered writes                  */
-        PBF_ORDERED = (1 << 11),    /* use ordered writes                  */
+        XBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead             */
-        PBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead             */
        /* flags used only as arguments to access routines */
-        PBF_LOCK = (1 << 14),       /* lock requested                      */
+        XBF_LOCK = (1 << 14),       /* lock requested                      */
-        PBF_TRYLOCK = (1 << 15),    /* lock requested, but do not wait     */
+        XBF_TRYLOCK = (1 << 15),    /* lock requested, but do not wait     */
-        PBF_DONT_BLOCK = (1 << 16), /* do not block in current thread      */
+        XBF_DONT_BLOCK = (1 << 16), /* do not block in current thread      */
        /* flags used only internally */
-        _PBF_PAGE_CACHE = (1 << 17),/* backed by pagecache                 */
+        _XBF_PAGE_CACHE = (1 << 17),/* backed by pagecache                 */
-        _PBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc()              */
+        _XBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc()              */
-        _PBF_RUN_QUEUES = (1 << 19),/* run block device task queue         */
+        _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue         */
-        _PBF_DELWRI_Q = (1 << 21),   /* buffer on delwri queue             */
+        _XBF_DELWRI_Q = (1 << 21),   /* buffer on delwri queue             */
-} page_buf_flags_t;
+} xfs_buf_flags_t;
+typedef enum {
+        XBT_FORCE_SLEEP = (0 << 1),
+        XBT_FORCE_FLUSH = (1 << 1),
+} xfs_buftarg_flags_t;
 typedef struct xfs_bufhash {
        struct list_head        bh_list;
@@ -77,477 +80,350 @@ typedef struct xfs_bufhash {
 } xfs_bufhash_t;
 typedef struct xfs_buftarg {
-        dev_t                   pbr_dev;
+        dev_t                   bt_dev;
-        struct block_device     *pbr_bdev;
+        struct block_device     *bt_bdev;
-        struct address_space    *pbr_mapping;
+        struct address_space    *bt_mapping;
-        unsigned int            pbr_bsize;
+        unsigned int            bt_bsize;
-        unsigned int            pbr_sshift;
+        unsigned int            bt_sshift;
-        size_t                  pbr_smask;
+        size_t                  bt_smask;
-        /* per-device buffer hash table */
+        /* per device buffer hash table */
        uint                    bt_hashmask;
        uint                    bt_hashshift;
        xfs_bufhash_t           *bt_hash;
+        /* per device delwri queue */
+        struct task_struct      *bt_task;
+        struct list_head        bt_list;
+        struct list_head        bt_delwrite_queue;
+        spinlock_t              bt_delwrite_lock;
+        unsigned long           bt_flags;
 } xfs_buftarg_t;
 /*
- *      xfs_buf_t:  Buffer structure for page cache-based buffers
+ *      xfs_buf_t:  Buffer structure for pagecache-based buffers
+ *
+ * This buffer structure is used by the pagecache buffer management routines
+ * to refer to an assembly of pages forming a logical buffer.
 *
- * This buffer structure is used by the page cache buffer management routines
+ * The buffer structure is used on a temporary basis only, and discarded when
- * to refer to an assembly of pages forming a logical buffer.  The actual I/O
+ * released.  The real data storage is recorded in the pagecache. Buffers are
- * is performed with buffer_head structures, as required by drivers.
- * 
- * The buffer structure is used on temporary basis only, and discarded when
- * released.  The real data storage is recorded in the page cache.  Metadata is
 * hashed to the block device on which the file system resides.
 */
 struct xfs_buf;
+typedef void (*xfs_buf_iodone_t)(struct xfs_buf *);
+typedef void (*xfs_buf_relse_t)(struct xfs_buf *);
+typedef int (*xfs_buf_bdstrat_t)(struct xfs_buf *);
-/* call-back function on I/O completion */
+#define XB_PAGES        2
-typedef void (*page_buf_iodone_t)(struct xfs_buf *);
-/* call-back function on I/O completion */
-typedef void (*page_buf_relse_t)(struct xfs_buf *);
-/* pre-write function */
-typedef int (*page_buf_bdstrat_t)(struct xfs_buf *);
-#define PB_PAGES        2
 typedef struct xfs_buf {
-        struct semaphore        pb_sema;        /* semaphore for lockables  */
+        struct semaphore        b_sema;         /* semaphore for lockables */
-        unsigned long           pb_queuetime;   /* time buffer was queued   */
+        unsigned long           b_queuetime;    /* time buffer was queued */
-        atomic_t                pb_pin_count;   /* pin count                */
+        atomic_t                b_pin_count;    /* pin count */
-        wait_queue_head_t       pb_waiters;     /* unpin waiters            */
+        wait_queue_head_t       b_waiters;      /* unpin waiters */
-        struct list_head        pb_list;
+        struct list_head        b_list;
-        page_buf_flags_t        pb_flags;       /* status flags */
+        xfs_buf_flags_t         b_flags;        /* status flags */
-        struct list_head        pb_hash_list;   /* hash table list */
+        struct list_head        b_hash_list;    /* hash table list */
-        xfs_bufhash_t           *pb_hash;       /* hash table list start */
+        xfs_bufhash_t           *b_hash;        /* hash table list start */
-        xfs_buftarg_t           *pb_target;     /* buffer target (device) */
+        xfs_buftarg_t           *b_target;      /* buffer target (device) */
-        atomic_t                pb_hold;        /* reference count */
+        atomic_t                b_hold;         /* reference count */
-        xfs_daddr_t             pb_bn;          /* block number for I/O */
+        xfs_daddr_t             b_bn;           /* block number for I/O */
-        loff_t                  pb_file_offset; /* offset in file */
+        xfs_off_t               b_file_offset;  /* offset in file */
-        size_t                  pb_buffer_length; /* size of buffer in bytes */
+        size_t                  b_buffer_length;/* size of buffer in bytes */
-        size_t                  pb_count_desired; /* desired transfer size */
+        size_t                  b_count_desired;/* desired transfer size */
-        void                    *pb_addr;       /* virtual address of buffer */
+        void                    *b_addr;        /* virtual address of buffer */
-        struct work_struct      pb_iodone_work;
+        struct work_struct      b_iodone_work;
-        atomic_t                pb_io_remaining;/* #outstanding I/O requests */
+        atomic_t                b_io_remaining; /* #outstanding I/O requests */
-        page_buf_iodone_t       pb_iodone;      /* I/O completion function */
+        xfs_buf_iodone_t        b_iodone;       /* I/O completion function */
-        page_buf_relse_t        pb_relse;       /* releasing function */
+        xfs_buf_relse_t         b_relse;        /* releasing function */
-        page_buf_bdstrat_t      pb_strat;       /* pre-write function */
+        xfs_buf_bdstrat_t       b_strat;        /* pre-write function */
-        struct semaphore        pb_iodonesema;  /* Semaphore for I/O waiters */
+        struct semaphore        b_iodonesema;   /* Semaphore for I/O waiters */
-        void                    *pb_fspriv;
+        void                    *b_fspriv;
-        void                    *pb_fspriv2;
+        void                    *b_fspriv2;
-        void                    *pb_fspriv3;
+        void                    *b_fspriv3;
-        unsigned short          pb_error;       /* error code on I/O */
+        unsigned short          b_error;        /* error code on I/O */
-        unsigned short          pb_locked;      /* page array is locked */
+        unsigned short          b_locked;       /* page array is locked */
-        unsigned int            pb_page_count;  /* size of page array */
+        unsigned int            b_page_count;   /* size of page array */
-        unsigned int            pb_offset;      /* page offset in first page */
+        unsigned int            b_offset;       /* page offset in first page */
-        struct page             **pb_pages;     /* array of page pointers */
+        struct page             **b_pages;      /* array of page pointers */
-        struct page             *pb_page_array[PB_PAGES]; /* inline pages */
+        struct page             *b_page_array[XB_PAGES]; /* inline pages */
-#ifdef PAGEBUF_LOCK_TRACKING
+#ifdef XFS_BUF_LOCK_TRACKING
-        int                     pb_last_holder;
+        int                     b_last_holder;
 #endif
 } xfs_buf_t;
 /* Finding and Reading Buffers */
+extern xfs_buf_t *_xfs_buf_find(xfs_buftarg_t *, xfs_off_t, size_t,
-extern xfs_buf_t *_pagebuf_find(        /* find buffer for block if     */
+                                xfs_buf_flags_t, xfs_buf_t *);
-                                        /* the block is in memory       */
-                xfs_buftarg_t *,        /* inode for block              */
-                loff_t,                 /* starting offset of range     */
-                size_t,                 /* length of range              */
-                page_buf_flags_t,       /* PBF_LOCK                     */
-                xfs_buf_t *);           /* newly allocated buffer       */
 #define xfs_incore(buftarg,blkno,len,lockit) \
-        _pagebuf_find(buftarg, blkno ,len, lockit, NULL)
+        _xfs_buf_find(buftarg, blkno ,len, lockit, NULL)
-extern xfs_buf_t *xfs_buf_get_flags(    /* allocate a buffer            */
-                xfs_buftarg_t *,        /* inode for buffer             */
-                loff_t,                 /* starting offset of range     */
-                size_t,                 /* length of range              */
-                page_buf_flags_t);      /* PBF_LOCK, PBF_READ,          */
-                                        /* PBF_ASYNC                    */
+extern xfs_buf_t *xfs_buf_get_flags(xfs_buftarg_t *, xfs_off_t, size_t,
+                                xfs_buf_flags_t);
 #define xfs_buf_get(target, blkno, len, flags) \
-        xfs_buf_get_flags((target), (blkno), (len), PBF_LOCK | PBF_MAPPED)
+        xfs_buf_get_flags((target), (blkno), (len), XBF_LOCK | XBF_MAPPED)
-extern xfs_buf_t *xfs_buf_read_flags(   /* allocate and read a buffer   */
-                xfs_buftarg_t *,        /* inode for buffer             */
-                loff_t,                 /* starting offset of range     */
-                size_t,                 /* length of range              */
-                page_buf_flags_t);      /* PBF_LOCK, PBF_ASYNC          */
+extern xfs_buf_t *xfs_buf_read_flags(xfs_buftarg_t *, xfs_off_t, size_t,
+                                xfs_buf_flags_t);
 #define xfs_buf_read(target, blkno, len, flags) \
-        xfs_buf_read_flags((target), (blkno), (len), PBF_LOCK | PBF_MAPPED)
+        xfs_buf_read_flags((target), (blkno), (len), XBF_LOCK | XBF_MAPPED)
-extern xfs_buf_t *pagebuf_get_empty(    /* allocate pagebuf struct with */
-                                        /*  no memory or disk address   */
-                size_t len,
-                xfs_buftarg_t *);       /* mount point "fake" inode     */
-extern xfs_buf_t *pagebuf_get_no_daddr(/* allocate pagebuf struct       */
-                                        /* without disk address         */
-                size_t len,
-                xfs_buftarg_t *);       /* mount point "fake" inode     */
-extern int pagebuf_associate_memory(
-                xfs_buf_t *,
-                void *,
-                size_t);
-extern void pagebuf_hold(               /* increment reference count    */
-                xfs_buf_t *);           /* buffer to hold               */
-extern void pagebuf_readahead(          /* read ahead into cache        */
+extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *);
-                xfs_buftarg_t  *,       /* target for buffer (or NULL)  */
+extern xfs_buf_t *xfs_buf_get_noaddr(size_t, xfs_buftarg_t *);
-                loff_t,                 /* starting offset of range     */
+extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t);
-                size_t,                 /* length of range              */
+extern void xfs_buf_hold(xfs_buf_t *);
-                page_buf_flags_t);      /* additional read flags        */
+extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t,
+                                xfs_buf_flags_t);
 /* Releasing Buffers */
+extern void xfs_buf_free(xfs_buf_t *);
-extern void pagebuf_free(               /* deallocate a buffer          */
+extern void xfs_buf_rele(xfs_buf_t *);
-                xfs_buf_t *);           /* buffer to deallocate         */
-extern void pagebuf_rele(               /* release hold on a buffer     */
-                xfs_buf_t *);           /* buffer to release            */
 /* Locking and Unlocking Buffers */
+extern int xfs_buf_cond_lock(xfs_buf_t *);
-extern int pagebuf_cond_lock(           /* lock buffer, if not locked   */
+extern int xfs_buf_lock_value(xfs_buf_t *);
-                                        /* (returns -EBUSY if locked)   */
+extern void xfs_buf_lock(xfs_buf_t *);
-                xfs_buf_t *);           /* buffer to lock               */
+extern void xfs_buf_unlock(xfs_buf_t *);
-extern int pagebuf_lock_value(          /* return count on lock         */
-                xfs_buf_t *);          /* buffer to check              */
-extern int pagebuf_lock(                /* lock buffer                  */
-                xfs_buf_t *);          /* buffer to lock               */
-extern void pagebuf_unlock(             /* unlock buffer                */
-                xfs_buf_t *);           /* buffer to unlock             */
 /* Buffer Read and Write Routines */
+extern void xfs_buf_ioend(xfs_buf_t *,  int);
-extern void pagebuf_iodone(             /* mark buffer I/O complete     */
+extern void xfs_buf_ioerror(xfs_buf_t *, int);
-                xfs_buf_t *,            /* buffer to mark               */
+extern int xfs_buf_iostart(xfs_buf_t *, xfs_buf_flags_t);
-                int);                   /* run completion locally, or in
+extern int xfs_buf_iorequest(xfs_buf_t *);
-                                         * a helper thread.             */
+extern int xfs_buf_iowait(xfs_buf_t *);
+extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, xfs_caddr_t,
-extern void pagebuf_ioerror(            /* mark buffer in error (or not) */
+                                xfs_buf_rw_t);
-                xfs_buf_t *,            /* buffer to mark               */
-                int);                   /* error to store (0 if none)   */
+static inline int xfs_buf_iostrategy(xfs_buf_t *bp)
-extern int pagebuf_iostart(             /* start I/O on a buffer        */
-                xfs_buf_t *,            /* buffer to start              */
-                page_buf_flags_t);      /* PBF_LOCK, PBF_ASYNC,         */
-                                        /* PBF_READ, PBF_WRITE,         */
-                                        /* PBF_DELWRI                   */
-extern int pagebuf_iorequest(           /* start real I/O               */
-                xfs_buf_t *);           /* buffer to convey to device   */
-extern int pagebuf_iowait(              /* wait for buffer I/O done     */
-                xfs_buf_t *);           /* buffer to wait on            */
-extern void pagebuf_iomove(             /* move data in/out of pagebuf  */
-                xfs_buf_t *,            /* buffer to manipulate         */
-                size_t,                 /* starting buffer offset       */
-                size_t,                 /* length in buffer             */
-                caddr_t,                /* data pointer                 */
-                page_buf_rw_t);         /* direction                    */
-static inline int pagebuf_iostrategy(xfs_buf_t *pb)
 {
-        return pb->pb_strat ? pb->pb_strat(pb) : pagebuf_iorequest(pb);
+        return bp->b_strat ? bp->b_strat(bp) : xfs_buf_iorequest(bp);
 }
-static inline int pagebuf_geterror(xfs_buf_t *pb)
+static inline int xfs_buf_geterror(xfs_buf_t *bp)
 {
-        return pb ? pb->pb_error : ENOMEM;
+        return bp ? bp->b_error : ENOMEM;
 }
 /* Buffer Utility Routines */
+extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t);
-extern caddr_t pagebuf_offset(          /* pointer at offset in buffer  */
-                xfs_buf_t *,            /* buffer to offset into        */
-                size_t);                /* offset                       */
 /* Pinning Buffer Storage in Memory */
+extern void xfs_buf_pin(xfs_buf_t *);
-extern void pagebuf_pin(                /* pin buffer in memory         */
+extern void xfs_buf_unpin(xfs_buf_t *);
-                xfs_buf_t *);           /* buffer to pin                */
+extern int xfs_buf_ispin(xfs_buf_t *);
-extern void pagebuf_unpin(              /* unpin buffered data          */
-                xfs_buf_t *);           /* buffer to unpin              */
-extern int pagebuf_ispin(               /* check if buffer is pinned    */
-                xfs_buf_t *);           /* buffer to check              */
 /* Delayed Write Buffer Routines */
+extern void xfs_buf_delwri_dequeue(xfs_buf_t *);
-extern void pagebuf_delwri_dequeue(xfs_buf_t *);
 /* Buffer Daemon Setup Routines */
+extern int xfs_buf_init(void);
+extern void xfs_buf_terminate(void);
-extern int pagebuf_init(void);
+#ifdef XFS_BUF_TRACE
-extern void pagebuf_terminate(void);
+extern ktrace_t *xfs_buf_trace_buf;
+extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *);
-#ifdef PAGEBUF_TRACE
-extern ktrace_t *pagebuf_trace_buf;
-extern void pagebuf_trace(
-                xfs_buf_t *,            /* buffer being traced          */
-                char *,                 /* description of operation     */
-                void *,                 /* arbitrary diagnostic value   */
-                void *);                /* return address               */
 #else
-# define pagebuf_trace(pb, id, ptr, ra) do { } while (0)
+#define xfs_buf_trace(bp,id,ptr,ra)     do { } while (0)
 #endif
-#define pagebuf_target_name(target)     \
+#define xfs_buf_target_name(target)     \
-        ({ char __b[BDEVNAME_SIZE]; bdevname((target)->pbr_bdev, __b); __b; })
+        ({ char __b[BDEVNAME_SIZE]; bdevname((target)->bt_bdev, __b); __b; })
+#define XFS_B_ASYNC             XBF_ASYNC
+#define XFS_B_DELWRI            XBF_DELWRI
+#define XFS_B_READ              XBF_READ
+#define XFS_B_WRITE             XBF_WRITE
+#define XFS_B_STALE             XBF_STALE
-/* These are just for xfs_syncsub... it sets an internal variable
+#define XFS_BUF_TRYLOCK         XBF_TRYLOCK
- * then passes it to VOP_FLUSH_PAGES or adds the flags to a newly gotten buf_t
+#define XFS_INCORE_TRYLOCK      XBF_TRYLOCK
- */
+#define XFS_BUF_LOCK            XBF_LOCK
-#define XFS_B_ASYNC             PBF_ASYNC
+#define XFS_BUF_MAPPED          XBF_MAPPED
-#define XFS_B_DELWRI            PBF_DELWRI
-#define XFS_B_READ              PBF_READ
-#define XFS_B_WRITE             PBF_WRITE
-#define XFS_B_STALE             PBF_STALE
-#define XFS_BUF_TRYLOCK         PBF_TRYLOCK
-#define XFS_INCORE_TRYLOCK      PBF_TRYLOCK
-#define XFS_BUF_LOCK            PBF_LOCK
-#define XFS_BUF_MAPPED          PBF_MAPPED
-#define BUF_BUSY                PBF_DONT_BLOCK
-#define XFS_BUF_BFLAGS(x)       ((x)->pb_flags)
-#define XFS_BUF_ZEROFLAGS(x)    \
-        ((x)->pb_flags &= ~(PBF_READ|PBF_WRITE|PBF_ASYNC|PBF_DELWRI))
-#define XFS_BUF_STALE(x)        ((x)->pb_flags |= XFS_B_STALE)
-#define XFS_BUF_UNSTALE(x)      ((x)->pb_flags &= ~XFS_B_STALE)
-#define XFS_BUF_ISSTALE(x)      ((x)->pb_flags & XFS_B_STALE)
-#define XFS_BUF_SUPER_STALE(x)  do {                            \
-                                        XFS_BUF_STALE(x);       \
-                                        pagebuf_delwri_dequeue(x);      \
-                                        XFS_BUF_DONE(x);        \
-                                } while (0)
-#define XFS_BUF_MANAGE          PBF_FS_MANAGED
+#define BUF_BUSY                XBF_DONT_BLOCK
-#define XFS_BUF_UNMANAGE(x)     ((x)->pb_flags &= ~PBF_FS_MANAGED)
+#define XFS_BUF_BFLAGS(bp)      ((bp)->b_flags)
-#define XFS_BUF_DELAYWRITE(x)    ((x)->pb_flags |= PBF_DELWRI)
+#define XFS_BUF_ZEROFLAGS(bp)   \
-#define XFS_BUF_UNDELAYWRITE(x)  pagebuf_delwri_dequeue(x)
+        ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI))
-#define XFS_BUF_ISDELAYWRITE(x)  ((x)->pb_flags & PBF_DELWRI)
+#define XFS_BUF_STALE(bp)       ((bp)->b_flags |= XFS_B_STALE)
-#define XFS_BUF_ERROR(x,no)      pagebuf_ioerror(x,no)
+#define XFS_BUF_UNSTALE(bp)     ((bp)->b_flags &= ~XFS_B_STALE)
-#define XFS_BUF_GETERROR(x)      pagebuf_geterror(x)
+#define XFS_BUF_ISSTALE(bp)     ((bp)->b_flags & XFS_B_STALE)
-#define XFS_BUF_ISERROR(x)       (pagebuf_geterror(x)?1:0)
+#define XFS_BUF_SUPER_STALE(bp) do {                            \
+                                        XFS_BUF_STALE(bp);      \
-#define XFS_BUF_DONE(x)          ((x)->pb_flags |= PBF_DONE)
+                                        xfs_buf_delwri_dequeue(bp);     \
-#define XFS_BUF_UNDONE(x)        ((x)->pb_flags &= ~PBF_DONE)
+                                        XFS_BUF_DONE(bp);       \
-#define XFS_BUF_ISDONE(x)        ((x)->pb_flags & PBF_DONE)
+                                } while (0)
-#define XFS_BUF_BUSY(x)          do { } while (0)
-#define XFS_BUF_UNBUSY(x)        do { } while (0)
-#define XFS_BUF_ISBUSY(x)        (1)
-#define XFS_BUF_ASYNC(x)         ((x)->pb_flags |= PBF_ASYNC)
-#define XFS_BUF_UNASYNC(x)       ((x)->pb_flags &= ~PBF_ASYNC)
-#define XFS_BUF_ISASYNC(x)       ((x)->pb_flags & PBF_ASYNC)
-#define XFS_BUF_ORDERED(x)       ((x)->pb_flags |= PBF_ORDERED)
-#define XFS_BUF_UNORDERED(x)     ((x)->pb_flags &= ~PBF_ORDERED)
-#define XFS_BUF_ISORDERED(x)     ((x)->pb_flags & PBF_ORDERED)
-#define XFS_BUF_SHUT(x)          printk("XFS_BUF_SHUT not implemented yet\n")
-#define XFS_BUF_UNSHUT(x)        printk("XFS_BUF_UNSHUT not implemented yet\n")
-#define XFS_BUF_ISSHUT(x)        (0)
-#define XFS_BUF_HOLD(x)         pagebuf_hold(x)
-#define XFS_BUF_READ(x)         ((x)->pb_flags |= PBF_READ)
-#define XFS_BUF_UNREAD(x)       ((x)->pb_flags &= ~PBF_READ)
-#define XFS_BUF_ISREAD(x)       ((x)->pb_flags & PBF_READ)
-#define XFS_BUF_WRITE(x)        ((x)->pb_flags |= PBF_WRITE)
-#define XFS_BUF_UNWRITE(x)      ((x)->pb_flags &= ~PBF_WRITE)
-#define XFS_BUF_ISWRITE(x)      ((x)->pb_flags & PBF_WRITE)
-#define XFS_BUF_ISUNINITIAL(x)   (0)
-#define XFS_BUF_UNUNINITIAL(x)   (0)
-#define XFS_BUF_BP_ISMAPPED(bp)  1
-#define XFS_BUF_IODONE_FUNC(buf)        (buf)->pb_iodone
-#define XFS_BUF_SET_IODONE_FUNC(buf, func)      \
-                        (buf)->pb_iodone = (func)
-#define XFS_BUF_CLR_IODONE_FUNC(buf)            \
-                        (buf)->pb_iodone = NULL
-#define XFS_BUF_SET_BDSTRAT_FUNC(buf, func)     \
-                        (buf)->pb_strat = (func)
-#define XFS_BUF_CLR_BDSTRAT_FUNC(buf)           \
-                        (buf)->pb_strat = NULL
-#define XFS_BUF_FSPRIVATE(buf, type)            \
-                        ((type)(buf)->pb_fspriv)
-#define XFS_BUF_SET_FSPRIVATE(buf, value)       \
-                        (buf)->pb_fspriv = (void *)(value)
-#define XFS_BUF_FSPRIVATE2(buf, type)           \
-                        ((type)(buf)->pb_fspriv2)
-#define XFS_BUF_SET_FSPRIVATE2(buf, value)      \
-                        (buf)->pb_fspriv2 = (void *)(value)
-#define XFS_BUF_FSPRIVATE3(buf, type)           \
-                        ((type)(buf)->pb_fspriv3)
-#define XFS_BUF_SET_FSPRIVATE3(buf, value)      \
-                        (buf)->pb_fspriv3  = (void *)(value)
-#define XFS_BUF_SET_START(buf)
-#define XFS_BUF_SET_BRELSE_FUNC(buf, value) \
-                        (buf)->pb_relse = (value)
-#define XFS_BUF_PTR(bp)         (xfs_caddr_t)((bp)->pb_addr)
-static inline xfs_caddr_t xfs_buf_offset(xfs_buf_t *bp, size_t offset)
-{
-        if (bp->pb_flags & PBF_MAPPED)
-                return XFS_BUF_PTR(bp) + offset;
-        return (xfs_caddr_t) pagebuf_offset(bp, offset);
-}
-#define XFS_BUF_SET_PTR(bp, val, count)         \
+#define XFS_BUF_MANAGE          XBF_FS_MANAGED
-                                pagebuf_associate_memory(bp, val, count)
+#define XFS_BUF_UNMANAGE(bp)    ((bp)->b_flags &= ~XBF_FS_MANAGED)
-#define XFS_BUF_ADDR(bp)        ((bp)->pb_bn)
-#define XFS_BUF_SET_ADDR(bp, blk)               \
+#define XFS_BUF_DELAYWRITE(bp)          ((bp)->b_flags |= XBF_DELWRI)
-                        ((bp)->pb_bn = (xfs_daddr_t)(blk))
+#define XFS_BUF_UNDELAYWRITE(bp)        xfs_buf_delwri_dequeue(bp)
-#define XFS_BUF_OFFSET(bp)      ((bp)->pb_file_offset)
+#define XFS_BUF_ISDELAYWRITE(bp)        ((bp)->b_flags & XBF_DELWRI)
-#define XFS_BUF_SET_OFFSET(bp, off)             \
-                        ((bp)->pb_file_offset = (off))
+#define XFS_BUF_ERROR(bp,no)    xfs_buf_ioerror(bp,no)
-#define XFS_BUF_COUNT(bp)       ((bp)->pb_count_desired)
+#define XFS_BUF_GETERROR(bp)    xfs_buf_geterror(bp)
-#define XFS_BUF_SET_COUNT(bp, cnt)              \
+#define XFS_BUF_ISERROR(bp)     (xfs_buf_geterror(bp) ? 1 : 0)
-                        ((bp)->pb_count_desired = (cnt))
-#define XFS_BUF_SIZE(bp)        ((bp)->pb_buffer_length)
+#define XFS_BUF_DONE(bp)        ((bp)->b_flags |= XBF_DONE)
-#define XFS_BUF_SET_SIZE(bp, cnt)               \
+#define XFS_BUF_UNDONE(bp)      ((bp)->b_flags &= ~XBF_DONE)
-                        ((bp)->pb_buffer_length = (cnt))
+#define XFS_BUF_ISDONE(bp)      ((bp)->b_flags & XBF_DONE)
-#define XFS_BUF_SET_VTYPE_REF(bp, type, ref)
-#define XFS_BUF_SET_VTYPE(bp, type)
+#define XFS_BUF_BUSY(bp)        do { } while (0)
-#define XFS_BUF_SET_REF(bp, ref)
+#define XFS_BUF_UNBUSY(bp)      do { } while (0)
+#define XFS_BUF_ISBUSY(bp)      (1)
-#define XFS_BUF_ISPINNED(bp)    pagebuf_ispin(bp)
+#define XFS_BUF_ASYNC(bp)       ((bp)->b_flags |= XBF_ASYNC)
-#define XFS_BUF_VALUSEMA(bp)    pagebuf_lock_value(bp)
+#define XFS_BUF_UNASYNC(bp)     ((bp)->b_flags &= ~XBF_ASYNC)
-#define XFS_BUF_CPSEMA(bp)      (pagebuf_cond_lock(bp) == 0)
+#define XFS_BUF_ISASYNC(bp)     ((bp)->b_flags & XBF_ASYNC)
-#define XFS_BUF_VSEMA(bp)       pagebuf_unlock(bp)
-#define XFS_BUF_PSEMA(bp,x)     pagebuf_lock(bp)
+#define XFS_BUF_ORDERED(bp)     ((bp)->b_flags |= XBF_ORDERED)
-#define XFS_BUF_V_IODONESEMA(bp) up(&bp->pb_iodonesema);
+#define XFS_BUF_UNORDERED(bp)   ((bp)->b_flags &= ~XBF_ORDERED)
+#define XFS_BUF_ISORDERED(bp)   ((bp)->b_flags & XBF_ORDERED)
-/* setup the buffer target from a buftarg structure */
-#define XFS_BUF_SET_TARGET(bp, target)  \
+#define XFS_BUF_SHUT(bp)        do { } while (0)
-                (bp)->pb_target = (target)
+#define XFS_BUF_UNSHUT(bp)      do { } while (0)
-#define XFS_BUF_TARGET(bp)      ((bp)->pb_target)
+#define XFS_BUF_ISSHUT(bp)      (0)
-#define XFS_BUFTARG_NAME(target)        \
-                pagebuf_target_name(target)
+#define XFS_BUF_HOLD(bp)        xfs_buf_hold(bp)
+#define XFS_BUF_READ(bp)        ((bp)->b_flags |= XBF_READ)
-#define XFS_BUF_SET_VTYPE_REF(bp, type, ref)
+#define XFS_BUF_UNREAD(bp)      ((bp)->b_flags &= ~XBF_READ)
-#define XFS_BUF_SET_VTYPE(bp, type)
+#define XFS_BUF_ISREAD(bp)      ((bp)->b_flags & XBF_READ)
-#define XFS_BUF_SET_REF(bp, ref)
+#define XFS_BUF_WRITE(bp)       ((bp)->b_flags |= XBF_WRITE)
-static inline int       xfs_bawrite(void *mp, xfs_buf_t *bp)
+#define XFS_BUF_UNWRITE(bp)     ((bp)->b_flags &= ~XBF_WRITE)
+#define XFS_BUF_ISWRITE(bp)     ((bp)->b_flags & XBF_WRITE)
+#define XFS_BUF_ISUNINITIAL(bp) (0)
+#define XFS_BUF_UNUNINITIAL(bp) (0)
+#define XFS_BUF_BP_ISMAPPED(bp) (1)
+#define XFS_BUF_IODONE_FUNC(bp)                 ((bp)->b_iodone)
+#define XFS_BUF_SET_IODONE_FUNC(bp, func)       ((bp)->b_iodone = (func))
+#define XFS_BUF_CLR_IODONE_FUNC(bp)             ((bp)->b_iodone = NULL)
+#define XFS_BUF_SET_BDSTRAT_FUNC(bp, func)      ((bp)->b_strat = (func))
+#define XFS_BUF_CLR_BDSTRAT_FUNC(bp)            ((bp)->b_strat = NULL)
+#define XFS_BUF_FSPRIVATE(bp, type)             ((type)(bp)->b_fspriv)
+#define XFS_BUF_SET_FSPRIVATE(bp, val)          ((bp)->b_fspriv = (void*)(val))
+#define XFS_BUF_FSPRIVATE2(bp, type)            ((type)(bp)->b_fspriv2)
+#define XFS_BUF_SET_FSPRIVATE2(bp, val)         ((bp)->b_fspriv2 = (void*)(val))
+#define XFS_BUF_FSPRIVATE3(bp, type)            ((type)(bp)->b_fspriv3)
+#define XFS_BUF_SET_FSPRIVATE3(bp, val)         ((bp)->b_fspriv3 = (void*)(val))
+#define XFS_BUF_SET_START(bp)                   do { } while (0)
+#define XFS_BUF_SET_BRELSE_FUNC(bp, func)       ((bp)->b_relse = (func))
+#define XFS_BUF_PTR(bp)                 (xfs_caddr_t)((bp)->b_addr)
+#define XFS_BUF_SET_PTR(bp, val, cnt)   xfs_buf_associate_memory(bp, val, cnt)
+#define XFS_BUF_ADDR(bp)                ((bp)->b_bn)
+#define XFS_BUF_SET_ADDR(bp, bno)       ((bp)->b_bn = (xfs_daddr_t)(bno))
+#define XFS_BUF_OFFSET(bp)              ((bp)->b_file_offset)
+#define XFS_BUF_SET_OFFSET(bp, off)     ((bp)->b_file_offset = (off))
+#define XFS_BUF_COUNT(bp)               ((bp)->b_count_desired)
+#define XFS_BUF_SET_COUNT(bp, cnt)      ((bp)->b_count_desired = (cnt))
+#define XFS_BUF_SIZE(bp)                ((bp)->b_buffer_length)
+#define XFS_BUF_SET_SIZE(bp, cnt)       ((bp)->b_buffer_length = (cnt))
+#define XFS_BUF_SET_VTYPE_REF(bp, type, ref)    do { } while (0)
+#define XFS_BUF_SET_VTYPE(bp, type)             do { } while (0)
+#define XFS_BUF_SET_REF(bp, ref)                do { } while (0)
+#define XFS_BUF_ISPINNED(bp)    xfs_buf_ispin(bp)
+#define XFS_BUF_VALUSEMA(bp)    xfs_buf_lock_value(bp)
+#define XFS_BUF_CPSEMA(bp)      (xfs_buf_cond_lock(bp) == 0)
+#define XFS_BUF_VSEMA(bp)       xfs_buf_unlock(bp)
+#define XFS_BUF_PSEMA(bp,x)     xfs_buf_lock(bp)
+#define XFS_BUF_V_IODONESEMA(bp) up(&bp->b_iodonesema);
+#define XFS_BUF_SET_TARGET(bp, target)  ((bp)->b_target = (target))
+#define XFS_BUF_TARGET(bp)              ((bp)->b_target)
+#define XFS_BUFTARG_NAME(target)        xfs_buf_target_name(target)
+static inline int xfs_bawrite(void *mp, xfs_buf_t *bp)
 {
-        bp->pb_fspriv3 = mp;
+        bp->b_fspriv3 = mp;
-        bp->pb_strat = xfs_bdstrat_cb;
+        bp->b_strat = xfs_bdstrat_cb;
-        pagebuf_delwri_dequeue(bp);
+        xfs_buf_delwri_dequeue(bp);
-        return pagebuf_iostart(bp, PBF_WRITE | PBF_ASYNC | _PBF_RUN_QUEUES);
+        return xfs_buf_iostart(bp, XBF_WRITE | XBF_ASYNC | _XBF_RUN_QUEUES);
 }
-static inline void      xfs_buf_relse(xfs_buf_t *bp)
+static inline void xfs_buf_relse(xfs_buf_t *bp)
 {
-        if (!bp->pb_relse)
+        if (!bp->b_relse)
-                pagebuf_unlock(bp);
+                xfs_buf_unlock(bp);
-        pagebuf_rele(bp);
+        xfs_buf_rele(bp);
 }
-#define xfs_bpin(bp)            pagebuf_pin(bp)
+#define xfs_bpin(bp)            xfs_buf_pin(bp)
-#define xfs_bunpin(bp)          pagebuf_unpin(bp)
+#define xfs_bunpin(bp)          xfs_buf_unpin(bp)
 #define xfs_buftrace(id, bp)    \
-            pagebuf_trace(bp, id, NULL, (void *)__builtin_return_address(0))
+            xfs_buf_trace(bp, id, NULL, (void *)__builtin_return_address(0))
-#define xfs_biodone(pb)             \
+#define xfs_biodone(bp)         xfs_buf_ioend(bp, 0)
-            pagebuf_iodone(pb, 0)
-#define xfs_biomove(pb, off, len, data, rw) \
+#define xfs_biomove(bp, off, len, data, rw) \
-            pagebuf_iomove((pb), (off), (len), (data), \
+            xfs_buf_iomove((bp), (off), (len), (data), \
-                ((rw) == XFS_B_WRITE) ? PBRW_WRITE : PBRW_READ)
+                ((rw) == XFS_B_WRITE) ? XBRW_WRITE : XBRW_READ)
-#define xfs_biozero(pb, off, len) \
+#define xfs_biozero(bp, off, len) \
-            pagebuf_iomove((pb), (off), (len), NULL, PBRW_ZERO)
+            xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
-static inline int       XFS_bwrite(xfs_buf_t *pb)
+static inline int XFS_bwrite(xfs_buf_t *bp)
 {
-        int     iowait = (pb->pb_flags & PBF_ASYNC) == 0;
+        int     iowait = (bp->b_flags & XBF_ASYNC) == 0;
        int     error = 0;
        if (!iowait)
-                pb->pb_flags |= _PBF_RUN_QUEUES;
+                bp->b_flags |= _XBF_RUN_QUEUES;
-        pagebuf_delwri_dequeue(pb);
+        xfs_buf_delwri_dequeue(bp);
-        pagebuf_iostrategy(pb);
+        xfs_buf_iostrategy(bp);
        if (iowait) {
-                error = pagebuf_iowait(pb);
+                error = xfs_buf_iowait(bp);
-                xfs_buf_relse(pb);
+                xfs_buf_relse(bp);
        }
        return error;
 }
-#define XFS_bdwrite(pb)              \
+#define XFS_bdwrite(bp)         xfs_buf_iostart(bp, XBF_DELWRI | XBF_ASYNC)
-            pagebuf_iostart(pb, PBF_DELWRI | PBF_ASYNC)
 static inline int xfs_bdwrite(void *mp, xfs_buf_t *bp)
 {
-        bp->pb_strat = xfs_bdstrat_cb;
+        bp->b_strat = xfs_bdstrat_cb;
-        bp->pb_fspriv3 = mp;
+        bp->b_fspriv3 = mp;
+        return xfs_buf_iostart(bp, XBF_DELWRI | XBF_ASYNC);
-        return pagebuf_iostart(bp, PBF_DELWRI | PBF_ASYNC);
 }
-#define XFS_bdstrat(bp) pagebuf_iorequest(bp)
+#define XFS_bdstrat(bp) xfs_buf_iorequest(bp)
-#define xfs_iowait(pb)  pagebuf_iowait(pb)
+#define xfs_iowait(bp)  xfs_buf_iowait(bp)
 #define xfs_baread(target, rablkno, ralen)  \
-        pagebuf_readahead((target), (rablkno), (ralen), PBF_DONT_BLOCK)
+        xfs_buf_readahead((target), (rablkno), (ralen), XBF_DONT_BLOCK)
-#define xfs_buf_get_empty(len, target)  pagebuf_get_empty((len), (target))
-#define xfs_buf_get_noaddr(len, target) pagebuf_get_no_daddr((len), (target))
-#define xfs_buf_free(bp)                pagebuf_free(bp)
 /*
 *      Handling of buftargs.
 */
 extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int);
 extern void xfs_free_buftarg(xfs_buftarg_t *, int);
 extern void xfs_wait_buftarg(xfs_buftarg_t *);
 extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
 extern int xfs_flush_buftarg(xfs_buftarg_t *, int);
-#define xfs_getsize_buftarg(buftarg) \
+#define xfs_getsize_buftarg(buftarg)    block_size((buftarg)->bt_bdev)
-        block_size((buftarg)->pbr_bdev)
+#define xfs_readonly_buftarg(buftarg)   bdev_read_only((buftarg)->bt_bdev)
-#define xfs_readonly_buftarg(buftarg) \
-        bdev_read_only((buftarg)->pbr_bdev)
+#define xfs_binval(buftarg)             xfs_flush_buftarg(buftarg, 1)
-#define xfs_binval(buftarg) \
+#define XFS_bflush(buftarg)             xfs_flush_buftarg(buftarg, 1)
-        xfs_flush_buftarg(buftarg, 1)
-#define XFS_bflush(buftarg) \
-        xfs_flush_buftarg(buftarg, 1)
 #endif  /* __XFS_BUF_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_cred.h b/fs/xfs/linux-2.6/xfs_cred.h
index 4af491024727..e7f3da61c6c3 100644
--- a/fs/xfs/linux-2.6/xfs_cred.h
+++ b/fs/xfs/linux-2.6/xfs_cred.h
@@ -18,6 +18,8 @@
 #ifndef __XFS_CRED_H__
 #define __XFS_CRED_H__
+#include <linux/capability.h>
 /*
 * Credentials
 */
@@ -27,7 +29,7 @@ typedef struct cred {
 extern struct cred *sys_cred;
-/* this is a hack.. (assums sys_cred is the only cred_t in the system) */
+/* this is a hack.. (assumes sys_cred is the only cred_t in the system) */
 static __inline int capable_cred(cred_t *cr, int cid)
 {
        return (cr == sys_cred) ? 1 : capable(cid);
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 06111d0bbae4..ced4404339c7 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -509,16 +509,14 @@ linvfs_open_exec(
        vnode_t         *vp = LINVFS_GET_VP(inode);
        xfs_mount_t     *mp = XFS_VFSTOM(vp->v_vfsp);
        int             error = 0;
-        bhv_desc_t      *bdp;
        xfs_inode_t     *ip;
        if (vp->v_vfsp->vfs_flag & VFS_DMI) {
-                bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops);
+                ip = xfs_vtoi(vp);
-                if (!bdp) {
+                if (!ip) {
                        error = -EINVAL;
                        goto open_exec_out;
                }
-                ip = XFS_BHVTOI(bdp);
                if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ)) {
                        error = -XFS_SEND_DATA(mp, DM_EVENT_READ, vp,
                                               0, 0, 0, NULL);
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
index f89340c61bf2..4fa4b1a5187e 100644
--- a/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -79,8 +79,7 @@ fs_flushinval_pages(
        struct inode    *ip = LINVFS_GET_IP(vp);
        if (VN_CACHED(vp)) {
-                filemap_fdatawrite(ip->i_mapping);
+                filemap_write_and_wait(ip->i_mapping);
-                filemap_fdatawait(ip->i_mapping);
                truncate_inode_pages(ip->i_mapping, first);
        }
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index b78b5eb9e96c..4db47790415c 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -52,6 +52,7 @@
 #include "xfs_dfrag.h"
 #include "xfs_fsops.h"
+#include <linux/capability.h>
 #include <linux/dcache.h>
 #include <linux/mount.h>
 #include <linux/namei.h>
@@ -145,13 +146,10 @@ xfs_find_handle(
        if (cmd != XFS_IOC_PATH_TO_FSHANDLE) {
                xfs_inode_t     *ip;
-                bhv_desc_t      *bhv;
                int             lock_mode;
                /* need to get access to the xfs_inode to read the generation */
-                bhv = vn_bhv_lookup_unlocked(VN_BHV_HEAD(vp), &xfs_vnodeops);
+                ip = xfs_vtoi(vp);
-                ASSERT(bhv);
-                ip = XFS_BHVTOI(bhv);
                ASSERT(ip);
                lock_mode = xfs_ilock_map_shared(ip);
@@ -530,6 +528,8 @@ xfs_attrmulti_attr_set(
        char                    *kbuf;
        int                     error = EFAULT;
+        if (IS_RDONLY(&vp->v_inode))
+                return -EROFS;
        if (IS_IMMUTABLE(&vp->v_inode) || IS_APPEND(&vp->v_inode))
                return EPERM;
        if (len > XATTR_SIZE_MAX)
@@ -557,6 +557,9 @@ xfs_attrmulti_attr_remove(
 {
        int                     error;
+        if (IS_RDONLY(&vp->v_inode))
+                return -EROFS;
        if (IS_IMMUTABLE(&vp->v_inode) || IS_APPEND(&vp->v_inode))
                return EPERM;
@@ -745,9 +748,8 @@ xfs_ioctl(
                        (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
                        mp->m_rtdev_targp : mp->m_ddev_targp;
-                da.d_mem = da.d_miniosz = 1 << target->pbr_sshift;
+                da.d_mem = da.d_miniosz = 1 << target->bt_sshift;
-                /* The size dio will do in one go */
+                da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1);
-                da.d_maxiosz = 64 * PAGE_CACHE_SIZE;
                if (copy_to_user(arg, &da, sizeof(da)))
                        return -XFS_ERROR(EFAULT);
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index c83ae15bb0e6..a7c9ba1a9f7b 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -19,7 +19,6 @@
 #include <linux/compat.h>
 #include <linux/init.h>
 #include <linux/ioctl.h>
-#include <linux/ioctl32.h>
 #include <linux/syscalls.h>
 #include <linux/types.h>
 #include <linux/fs.h>
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 14215a7db59f..76c6df34d0db 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -51,8 +51,44 @@
 #include "xfs_buf_item.h"
 #include "xfs_utils.h"
+#include <linux/capability.h>
 #include <linux/xattr.h>
 #include <linux/namei.h>
+#include <linux/security.h>
+/*
+ * Get a XFS inode from a given vnode.
+ */
+xfs_inode_t *
+xfs_vtoi(
+        struct vnode    *vp)
+{
+        bhv_desc_t      *bdp;
+        bdp = bhv_lookup_range(VN_BHV_HEAD(vp),
+                        VNODE_POSITION_XFS, VNODE_POSITION_XFS);
+        if (unlikely(bdp == NULL))
+                return NULL;
+        return XFS_BHVTOI(bdp);
+}
+/*
+ * Bring the atime in the XFS inode uptodate.
+ * Used before logging the inode to disk or when the Linux inode goes away.
+ */
+void
+xfs_synchronize_atime(
+        xfs_inode_t     *ip)
+{
+        vnode_t         *vp;
+        vp = XFS_ITOV_NULL(ip);
+        if (vp) {
+                struct inode *inode = &vp->v_inode;
+                ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
+                ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;
+        }
+}
 /*
 * Change the requested timestamp in the given inode.
@@ -73,23 +109,6 @@ xfs_ichgtime(
        struct inode    *inode = LINVFS_GET_IP(XFS_ITOV(ip));
        timespec_t      tv;
-        /*
-         * We're not supposed to change timestamps in readonly-mounted
-         * filesystems.  Throw it away if anyone asks us.
-         */
-        if (unlikely(IS_RDONLY(inode)))
-                return;
-        /*
-         * Don't update access timestamps on reads if mounted "noatime".
-         * Throw it away if anyone asks us.
-         */
-        if (unlikely(
-            (ip->i_mount->m_flags & XFS_MOUNT_NOATIME || IS_NOATIME(inode)) &&
-            (flags & (XFS_ICHGTIME_ACC|XFS_ICHGTIME_MOD|XFS_ICHGTIME_CHG)) ==
-                        XFS_ICHGTIME_ACC))
-                return;
        nanotime(&tv);
        if (flags & XFS_ICHGTIME_MOD) {
                inode->i_mtime = tv;
@@ -126,8 +145,6 @@ xfs_ichgtime(
 * Variant on the above which avoids querying the system clock
 * in situations where we know the Linux inode timestamps have
 * just been updated (and so we can update our inode cheaply).
- * We also skip the readonly and noatime checks here, they are
- * also catered for already.
 */
 void
 xfs_ichgtime_fast(
@@ -138,20 +155,16 @@ xfs_ichgtime_fast(
        timespec_t      *tvp;
        /*
-         * We're not supposed to change timestamps in readonly-mounted
+         * Atime updates for read() & friends are handled lazily now, and
-         * filesystems.  Throw it away if anyone asks us.
+         * explicit updates must go through xfs_ichgtime()
         */
-        if (unlikely(IS_RDONLY(inode)))
+        ASSERT((flags & XFS_ICHGTIME_ACC) == 0);
-                return;
        /*
-         * Don't update access timestamps on reads if mounted "noatime".
+         * We're not supposed to change timestamps in readonly-mounted
-         * Throw it away if anyone asks us.
+         * filesystems.  Throw it away if anyone asks us.
         */
-        if (unlikely(
+        if (unlikely(IS_RDONLY(inode)))
-            (ip->i_mount->m_flags & XFS_MOUNT_NOATIME || IS_NOATIME(inode)) &&
-            ((flags & (XFS_ICHGTIME_ACC|XFS_ICHGTIME_MOD|XFS_ICHGTIME_CHG)) ==
-                        XFS_ICHGTIME_ACC)))
                return;
        if (flags & XFS_ICHGTIME_MOD) {
@@ -159,11 +172,6 @@ xfs_ichgtime_fast(
                ip->i_d.di_mtime.t_sec = (__int32_t)tvp->tv_sec;
                ip->i_d.di_mtime.t_nsec = (__int32_t)tvp->tv_nsec;
        }
-        if (flags & XFS_ICHGTIME_ACC) {
-                tvp = &inode->i_atime;
-                ip->i_d.di_atime.t_sec = (__int32_t)tvp->tv_sec;
-                ip->i_d.di_atime.t_nsec = (__int32_t)tvp->tv_nsec;
-        }
        if (flags & XFS_ICHGTIME_CHG) {
                tvp = &inode->i_ctime;
                ip->i_d.di_ctime.t_sec = (__int32_t)tvp->tv_sec;
@@ -203,13 +211,46 @@ validate_fields(
                ip->i_nlink = va.va_nlink;
                ip->i_blocks = va.va_nblocks;
-                /* we're under i_sem so i_size can't change under us */
+                /* we're under i_mutex so i_size can't change under us */
                if (i_size_read(ip) != va.va_size)
                        i_size_write(ip, va.va_size);
        }
 }
 /*
+ * Hook in SELinux.  This is not quite correct yet, what we really need
+ * here (as we do for default ACLs) is a mechanism by which creation of
+ * these attrs can be journalled at inode creation time (along with the
+ * inode, of course, such that log replay can't cause these to be lost).
+ */
+STATIC int
+linvfs_init_security(
+        struct vnode    *vp,
+        struct inode    *dir)
+{
+        struct inode    *ip = LINVFS_GET_IP(vp);
+        size_t          length;
+        void            *value;
+        char            *name;
+        int             error;
+        error = security_inode_init_security(ip, dir, &name, &value, &length);
+        if (error) {
+                if (error == -EOPNOTSUPP)
+                        return 0;
+                return -error;
+        }
+        VOP_ATTR_SET(vp, name, value, length, ATTR_SECURE, NULL, error);
+        if (!error)
+                VMODIFY(vp);
+        kfree(name);
+        kfree(value);
+        return error;
+}
+/*
 * Determine whether a process has a valid fs_struct (kernel daemons
 * like knfsd don't have an fs_struct).
 *
@@ -274,6 +315,9 @@ linvfs_mknod(
                break;
        }
+        if (!error)
+                error = linvfs_init_security(vp, dir);
        if (default_acl) {
                if (!error) {
                        error = _ACL_INHERIT(vp, &va, default_acl);
@@ -290,8 +334,6 @@ linvfs_mknod(
                                teardown.d_inode = ip = LINVFS_GET_IP(vp);
                                teardown.d_name = dentry->d_name;
-                                vn_mark_bad(vp);
-                                
                                if (S_ISDIR(mode))
                                        VOP_RMDIR(dvp, &teardown, NULL, err2);
                                else
@@ -429,11 +471,14 @@ linvfs_symlink(
        error = 0;
        VOP_SYMLINK(dvp, dentry, &va, (char *)symname, &cvp, NULL, error);
-        if (!error && cvp) {
+        if (likely(!error && cvp)) {
-                ip = LINVFS_GET_IP(cvp);
+                error = linvfs_init_security(cvp, dir);
-                d_instantiate(dentry, ip);
+                if (likely(!error)) {
-                validate_fields(dir);
+                        ip = LINVFS_GET_IP(cvp);
-                validate_fields(ip); /* size needs update */
+                        d_instantiate(dentry, ip);
+                        validate_fields(dir);
+                        validate_fields(ip);
+                }
        }
        return -error;
 }
@@ -502,7 +547,7 @@ linvfs_follow_link(
        ASSERT(dentry);
        ASSERT(nd);
-        link = (char *)kmalloc(MAXNAMELEN+1, GFP_KERNEL);
+        link = (char *)kmalloc(MAXPATHLEN+1, GFP_KERNEL);
        if (!link) {
                nd_set_link(nd, ERR_PTR(-ENOMEM));
                return NULL;
@@ -518,12 +563,12 @@ linvfs_follow_link(
        vp = LINVFS_GET_VP(dentry->d_inode);
        iov.iov_base = link;
-        iov.iov_len = MAXNAMELEN;
+        iov.iov_len = MAXPATHLEN;
        uio->uio_iov = &iov;
        uio->uio_offset = 0;
        uio->uio_segflg = UIO_SYSSPACE;
-        uio->uio_resid = MAXNAMELEN;
+        uio->uio_resid = MAXPATHLEN;
        uio->uio_iovcnt = 1;
        VOP_READLINK(vp, uio, 0, NULL, error);
@@ -531,7 +576,7 @@ linvfs_follow_link(
                kfree(link);
                link = ERR_PTR(-error);
        } else {
-                link[MAXNAMELEN - uio->uio_resid] = '\0';
+                link[MAXPATHLEN - uio->uio_resid] = '\0';
        }
        kfree(uio);
diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h
index ee784b63acbf..6899a6b4a50a 100644
--- a/fs/xfs/linux-2.6/xfs_iops.h
+++ b/fs/xfs/linux-2.6/xfs_iops.h
@@ -26,11 +26,6 @@ extern struct file_operations linvfs_file_operations;
 extern struct file_operations linvfs_invis_file_operations;
 extern struct file_operations linvfs_dir_operations;
-extern struct address_space_operations linvfs_aops;
-extern int linvfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
-extern void linvfs_unwritten_done(struct buffer_head *, int);
 extern int xfs_ioctl(struct bhv_desc *, struct inode *, struct file *,
                        int, unsigned int, void __user *);
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index d8e21ba0cccc..67389b745526 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -110,10 +110,6 @@
 * delalloc and these ondisk-uninitialised buffers.
 */
 BUFFER_FNS(PrivateStart, unwritten);
-static inline void set_buffer_unwritten_io(struct buffer_head *bh)
-{
-        bh->b_end_io = linvfs_unwritten_done;
-}
 #define restricted_chown        xfs_params.restrict_chown.val
 #define irix_sgid_inherit       xfs_params.sgid_inherit.val
@@ -232,7 +228,7 @@ static inline void set_buffer_unwritten_io(struct buffer_head *bh)
 #define xfs_itruncate_data(ip, off)     \
        (-vmtruncate(LINVFS_GET_IP(XFS_ITOV(ip)), (off)))
 #define xfs_statvfs_fsid(statp, mp)     \
-        ({ u64 id = huge_encode_dev((mp)->m_dev);       \
+        ({ u64 id = huge_encode_dev((mp)->m_ddev_targp->bt_dev); \
           __kernel_fsid_t *fsid = &(statp)->f_fsid;    \
        (fsid->val[0] = (u32)id, fsid->val[1] = (u32)(id >> 32)); })
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 279e9bc92aba..e0ab45fbfebd 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -233,8 +233,8 @@ xfs_read(
                xfs_buftarg_t   *target =
                        (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
                                mp->m_rtdev_targp : mp->m_ddev_targp;
-                if ((*offset & target->pbr_smask) ||
+                if ((*offset & target->bt_smask) ||
-                    (size & target->pbr_smask)) {
+                    (size & target->bt_smask)) {
                        if (*offset == ip->i_d.di_size) {
                                return (0);
                        }
@@ -254,7 +254,7 @@ xfs_read(
        }
        if (unlikely(ioflags & IO_ISDIRECT))
-                down(&inode->i_sem);
+                mutex_lock(&inode->i_mutex);
        xfs_ilock(ip, XFS_IOLOCK_SHARED);
        if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) &&
@@ -281,12 +281,9 @@ xfs_read(
        xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-        if (likely(!(ioflags & IO_INVIS)))
-                xfs_ichgtime_fast(ip, inode, XFS_ICHGTIME_ACC);
 unlock_isem:
        if (unlikely(ioflags & IO_ISDIRECT))
-                up(&inode->i_sem);
+                mutex_unlock(&inode->i_mutex);
        return ret;
 }
@@ -346,9 +343,6 @@ xfs_sendfile(
        if (ret > 0)
                XFS_STATS_ADD(xs_read_bytes, ret);
-        if (likely(!(ioflags & IO_INVIS)))
-                xfs_ichgtime_fast(ip, LINVFS_GET_IP(vp), XFS_ICHGTIME_ACC);
        return ret;
 }
@@ -362,7 +356,6 @@ STATIC int				/* error (positive) */
 xfs_zero_last_block(
        struct inode    *ip,
        xfs_iocore_t    *io,
-        xfs_off_t       offset,
        xfs_fsize_t     isize,
        xfs_fsize_t     end_size)
 {
@@ -371,19 +364,16 @@ xfs_zero_last_block(
        int             nimaps;
        int             zero_offset;
        int             zero_len;
-        int             isize_fsb_offset;
        int             error = 0;
        xfs_bmbt_irec_t imap;
        loff_t          loff;
-        size_t          lsize;
        ASSERT(ismrlocked(io->io_lock, MR_UPDATE) != 0);
-        ASSERT(offset > isize);
        mp = io->io_mount;
-        isize_fsb_offset = XFS_B_FSB_OFFSET(mp, isize);
+        zero_offset = XFS_B_FSB_OFFSET(mp, isize);
-        if (isize_fsb_offset == 0) {
+        if (zero_offset == 0) {
                /*
                 * There are no extra bytes in the last block on disk to
                 * zero, so return.
@@ -413,10 +403,8 @@ xfs_zero_last_block(
         */
        XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD);
        loff = XFS_FSB_TO_B(mp, last_fsb);
-        lsize = XFS_FSB_TO_B(mp, 1);
-        zero_offset = isize_fsb_offset;
+        zero_len = mp->m_sb.sb_blocksize - zero_offset;
-        zero_len = mp->m_sb.sb_blocksize - isize_fsb_offset;
        error = xfs_iozero(ip, loff + zero_offset, zero_len, end_size);
@@ -447,20 +435,17 @@ xfs_zero_eof(
        struct inode    *ip = LINVFS_GET_IP(vp);
        xfs_fileoff_t   start_zero_fsb;
        xfs_fileoff_t   end_zero_fsb;
-        xfs_fileoff_t   prev_zero_fsb;
        xfs_fileoff_t   zero_count_fsb;
        xfs_fileoff_t   last_fsb;
        xfs_extlen_t    buf_len_fsb;
-        xfs_extlen_t    prev_zero_count;
        xfs_mount_t     *mp;
        int             nimaps;
        int             error = 0;
        xfs_bmbt_irec_t imap;
-        loff_t          loff;
-        size_t          lsize;
        ASSERT(ismrlocked(io->io_lock, MR_UPDATE));
        ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
+        ASSERT(offset > isize);
        mp = io->io_mount;
@@ -468,7 +453,7 @@ xfs_zero_eof(
         * First handle zeroing the block on which isize resides.
         * We only zero a part of that block so it is handled specially.
         */
-        error = xfs_zero_last_block(ip, io, offset, isize, end_size);
+        error = xfs_zero_last_block(ip, io, isize, end_size);
        if (error) {
                ASSERT(ismrlocked(io->io_lock, MR_UPDATE));
                ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
@@ -496,8 +481,6 @@ xfs_zero_eof(
        }
        ASSERT(start_zero_fsb <= end_zero_fsb);
-        prev_zero_fsb = NULLFILEOFF;
-        prev_zero_count = 0;
        while (start_zero_fsb <= end_zero_fsb) {
                nimaps = 1;
                zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
@@ -519,10 +502,7 @@ xfs_zero_eof(
                         * that sits on a hole and sets the page as P_HOLE
                         * and calls remapf if it is a mapped file.
                         */
-                        prev_zero_fsb = NULLFILEOFF;
+                        start_zero_fsb = imap.br_startoff + imap.br_blockcount;
-                        prev_zero_count = 0;
-                        start_zero_fsb = imap.br_startoff +
-                                         imap.br_blockcount;
                        ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
                        continue;
                }
@@ -543,17 +523,15 @@ xfs_zero_eof(
                 */
                XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
-                loff = XFS_FSB_TO_B(mp, start_zero_fsb);
+                error = xfs_iozero(ip,
-                lsize = XFS_FSB_TO_B(mp, buf_len_fsb);
+                                   XFS_FSB_TO_B(mp, start_zero_fsb),
+                                   XFS_FSB_TO_B(mp, buf_len_fsb),
-                error = xfs_iozero(ip, loff, lsize, end_size);
+                                   end_size);
                if (error) {
                        goto out_lock;
                }
-                prev_zero_fsb = start_zero_fsb;
-                prev_zero_count = buf_len_fsb;
                start_zero_fsb = imap.br_startoff + buf_len_fsb;
                ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
@@ -640,7 +618,7 @@ xfs_write(
                        (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
                                mp->m_rtdev_targp : mp->m_ddev_targp;
-                if ((pos & target->pbr_smask) || (count & target->pbr_smask))
+                if ((pos & target->bt_smask) || (count & target->bt_smask))
                        return XFS_ERROR(-EINVAL);
                if (!VN_CACHED(vp) && pos < i_size_read(inode))
@@ -655,7 +633,7 @@ relock:
                iolock = XFS_IOLOCK_EXCL;
                locktype = VRWLOCK_WRITE;
-                down(&inode->i_sem);
+                mutex_lock(&inode->i_mutex);
        } else {
                iolock = XFS_IOLOCK_SHARED;
                locktype = VRWLOCK_WRITE_DIRECT;
@@ -686,7 +664,7 @@ start:
                int             dmflags = FILP_DELAY_FLAG(file);
                if (need_isem)
-                        dmflags |= DM_FLAGS_ISEM;
+                        dmflags |= DM_FLAGS_IMUX;
                xfs_iunlock(xip, XFS_ILOCK_EXCL);
                error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp,
@@ -713,7 +691,7 @@ start:
        }
        if (likely(!(ioflags & IO_INVIS))) {
-                inode_update_time(inode, 1);
+                file_update_time(file);
                xfs_ichgtime_fast(xip, inode,
                                  XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
        }
@@ -772,7 +750,7 @@ retry:
                if (need_isem) {
                        /* demote the lock now the cached pages are gone */
                        XFS_ILOCK_DEMOTE(mp, io, XFS_IOLOCK_EXCL);
-                        up(&inode->i_sem);
+                        mutex_unlock(&inode->i_mutex);
                        iolock = XFS_IOLOCK_SHARED;
                        locktype = VRWLOCK_WRITE_DIRECT;
@@ -817,20 +795,24 @@ retry:
                xfs_rwunlock(bdp, locktype);
                if (need_isem)
-                        up(&inode->i_sem);
+                        mutex_unlock(&inode->i_mutex);
                error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp,
                                DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL,
                                0, 0, 0); /* Delay flag intentionally  unused */
                if (error)
                        goto out_nounlocks;
                if (need_isem)
-                        down(&inode->i_sem);
+                        mutex_lock(&inode->i_mutex);
                xfs_rwlock(bdp, locktype);
                pos = xip->i_d.di_size;
                ret = 0;
                goto retry;
        }
+        isize = i_size_read(inode);
+        if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize))
+                *offset = isize;
        if (*offset > xip->i_d.di_size) {
                xfs_ilock(xip, XFS_ILOCK_EXCL);
                if (*offset > xip->i_d.di_size) {
@@ -926,7 +908,7 @@ retry:
        
                xfs_rwunlock(bdp, locktype);
                if (need_isem)
-                        up(&inode->i_sem);
+                        mutex_unlock(&inode->i_mutex);
                error = sync_page_range(inode, mapping, pos, ret);
                if (!error)
@@ -938,7 +920,7 @@ retry:
        xfs_rwunlock(bdp, locktype);
 out_unlock_isem:
        if (need_isem)
-                up(&inode->i_sem);
+                mutex_unlock(&inode->i_mutex);
 out_nounlocks:
        return -error;
 }
@@ -956,7 +938,7 @@ xfs_bdstrat_cb(struct xfs_buf *bp)
        mp = XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *);
        if (!XFS_FORCED_SHUTDOWN(mp)) {
-                pagebuf_iorequest(bp);
+                xfs_buf_iorequest(bp);
                return 0;
        } else {
                xfs_buftrace("XFS__BDSTRAT IOERROR", bp);
@@ -1009,7 +991,7 @@ xfsbdstrat(
                 * if (XFS_BUF_IS_GRIO(bp)) {
                 */
-                pagebuf_iorequest(bp);
+                xfs_buf_iorequest(bp);
                return 0;
        }
diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c
index 6c40a74be7c8..8955720a2c6b 100644
--- a/fs/xfs/linux-2.6/xfs_stats.c
+++ b/fs/xfs/linux-2.6/xfs_stats.c
@@ -34,7 +34,7 @@ xfs_read_xfsstats(
        __uint64_t      xs_write_bytes = 0;
        __uint64_t      xs_read_bytes = 0;
-        static struct xstats_entry {
+        static const struct xstats_entry {
                char    *desc;
                int     endpoint;
        } xstats[] = {
diff --git a/fs/xfs/linux-2.6/xfs_stats.h b/fs/xfs/linux-2.6/xfs_stats.h
index 50027c4a5618..8ba7a2fa6c1d 100644
--- a/fs/xfs/linux-2.6/xfs_stats.h
+++ b/fs/xfs/linux-2.6/xfs_stats.h
@@ -109,15 +109,15 @@ struct xfsstats {
        __uint32_t              vn_remove;      /* # times vn_remove called */
        __uint32_t              vn_free;        /* # times vn_free called */
 #define XFSSTAT_END_BUF                 (XFSSTAT_END_VNODE_OPS+9)
-        __uint32_t              pb_get;
+        __uint32_t              xb_get;
-        __uint32_t              pb_create;
+        __uint32_t              xb_create;
-        __uint32_t              pb_get_locked;
+        __uint32_t              xb_get_locked;
-        __uint32_t              pb_get_locked_waited;
+        __uint32_t              xb_get_locked_waited;
-        __uint32_t              pb_busy_locked;
+        __uint32_t              xb_busy_locked;
-        __uint32_t              pb_miss_locked;
+        __uint32_t              xb_miss_locked;
-        __uint32_t              pb_page_retries;
+        __uint32_t              xb_page_retries;
-        __uint32_t              pb_page_found;
+        __uint32_t              xb_page_found;
-        __uint32_t              pb_get_read;
+        __uint32_t              xb_get_read;
 /* Extra precision counters */
        __uint64_t              xs_xstrat_bytes;
        __uint64_t              xs_write_bytes;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 6116b5bf433e..f22e426d9e42 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -306,13 +306,15 @@ xfs_mountfs_check_barriers(xfs_mount_t *mp)
                xfs_fs_cmn_err(CE_NOTE, mp,
                  "Disabling barriers, not supported with external log device");
                mp->m_flags &= ~XFS_MOUNT_BARRIER;
+                return;
        }
-        if (mp->m_ddev_targp->pbr_bdev->bd_disk->queue->ordered ==
+        if (mp->m_ddev_targp->bt_bdev->bd_disk->queue->ordered ==
                                        QUEUE_ORDERED_NONE) {
                xfs_fs_cmn_err(CE_NOTE, mp,
                  "Disabling barriers, not supported by the underlying device");
                mp->m_flags &= ~XFS_MOUNT_BARRIER;
+                return;
        }
        error = xfs_barrier_test(mp);
@@ -320,6 +322,7 @@ xfs_mountfs_check_barriers(xfs_mount_t *mp)
                xfs_fs_cmn_err(CE_NOTE, mp,
                  "Disabling barriers, trial barrier write failed");
                mp->m_flags &= ~XFS_MOUNT_BARRIER;
+                return;
        }
 }
@@ -327,7 +330,7 @@ void
 xfs_blkdev_issue_flush(
        xfs_buftarg_t           *buftarg)
 {
-        blkdev_issue_flush(buftarg->pbr_bdev, NULL);
+        blkdev_issue_flush(buftarg->bt_bdev, NULL);
 }
 STATIC struct inode *
@@ -576,7 +579,7 @@ xfssyncd(
                timeleft = schedule_timeout_interruptible(timeleft);
                /* swsusp */
                try_to_freeze();
-                if (kthread_should_stop())
+                if (kthread_should_stop() && list_empty(&vfsp->vfs_sync_list))
                        break;
                spin_lock(&vfsp->vfs_sync_lock);
@@ -966,9 +969,9 @@ init_xfs_fs( void )
        if (error < 0)
                goto undo_zones;
-        error = pagebuf_init();
+        error = xfs_buf_init();
        if (error < 0)
-                goto undo_pagebuf;
+                goto undo_buffers;
        vn_init();
        xfs_init();
@@ -982,9 +985,9 @@ init_xfs_fs( void )
        return 0;
 undo_register:
-        pagebuf_terminate();
+        xfs_buf_terminate();
-undo_pagebuf:
+undo_buffers:
        linvfs_destroy_zones();
 undo_zones:
@@ -998,7 +1001,7 @@ exit_xfs_fs( void )
        XFS_DM_EXIT(&xfs_fs_type);
        unregister_filesystem(&xfs_fs_type);
        xfs_cleanup();
-        pagebuf_terminate();
+        xfs_buf_terminate();
        linvfs_destroy_zones();
        ktrace_uninit();
 }
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
index e9bbcb4d6243..260dd8415dd7 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ b/fs/xfs/linux-2.6/xfs_vnode.c
@@ -106,7 +106,6 @@ vn_revalidate_core(
        inode->i_blocks     = vap->va_nblocks;
        inode->i_mtime      = vap->va_mtime;
        inode->i_ctime      = vap->va_ctime;
-        inode->i_atime      = vap->va_atime;
        inode->i_blksize    = vap->va_blocksize;
        if (vap->va_xflags & XFS_XFLAG_IMMUTABLE)
                inode->i_flags |= S_IMMUTABLE;
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index f2bbb327c081..0fe2419461d6 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -566,6 +566,25 @@ static inline int VN_BAD(struct vnode *vp)
 }
 /*
+ * Extracting atime values in various formats
+ */
+static inline void vn_atime_to_bstime(struct vnode *vp, xfs_bstime_t *bs_atime)
+{
+        bs_atime->tv_sec = vp->v_inode.i_atime.tv_sec;
+        bs_atime->tv_nsec = vp->v_inode.i_atime.tv_nsec;
+}
+static inline void vn_atime_to_timespec(struct vnode *vp, struct timespec *ts)
+{
+        *ts = vp->v_inode.i_atime;
+}
+static inline void vn_atime_to_time_t(struct vnode *vp, time_t *tt)
+{
+        *tt = vp->v_inode.i_atime.tv_sec;
+}
+/*
 * Some useful predicates.
 */
 #define VN_MAPPED(vp)   mapping_mapped(LINVFS_GET_IP(vp)->i_mapping)
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 00b5043dfa5a..772ac48329ea 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -104,7 +104,7 @@ xfs_qm_dqinit(
         */
        if (brandnewdquot) {
                dqp->dq_flnext = dqp->dq_flprev = dqp;
-                mutex_init(&dqp->q_qlock,  MUTEX_DEFAULT, "xdq");
+                mutex_init(&dqp->q_qlock);
                initnsema(&dqp->q_flock, 1, "fdq");
                sv_init(&dqp->q_pinwait, SV_DEFAULT, "pdq");
@@ -1382,7 +1382,7 @@ void
 xfs_dqlock(
        xfs_dquot_t *dqp)
 {
-        mutex_lock(&(dqp->q_qlock), PINOD);
+        mutex_lock(&(dqp->q_qlock));
 }
 void
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index 2f69822344e5..2ec6b441849c 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -239,7 +239,7 @@ xfs_qm_dquot_logitem_pushbuf(
         * trying to duplicate our effort.
         */
        ASSERT(qip->qli_pushbuf_flag != 0);
-        ASSERT(qip->qli_push_owner == get_thread_id());
+        ASSERT(qip->qli_push_owner == current_pid());
        /*
         * If flushlock isn't locked anymore, chances are that the
@@ -333,7 +333,7 @@ xfs_qm_dquot_logitem_trylock(
                        qip->qli_pushbuf_flag = 1;
                        ASSERT(qip->qli_format.qlf_blkno == dqp->q_blkno);
 #ifdef DEBUG
-                        qip->qli_push_owner = get_thread_id();
+                        qip->qli_push_owner = current_pid();
 #endif
                        /*
                         * The dquot is left locked.
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 5328a2937127..53a00fb217fa 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -167,7 +167,7 @@ xfs_Gqm_init(void)
        xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO;
        xqm->qm_nrefs = 0;
 #ifdef DEBUG
-        mutex_init(&qcheck_lock, MUTEX_DEFAULT, "qchk");
+        mutex_init(&qcheck_lock);
 #endif
        return xqm;
 }
@@ -497,7 +497,7 @@ xfs_qm_dqflush_all(
        int             error;
        if (mp->m_quotainfo == NULL)
-                return (0);
+                return 0;
        niters = 0;
 again:
        xfs_qm_mplist_lock(mp);
@@ -528,7 +528,7 @@ again:
                error = xfs_qm_dqflush(dqp, flags);
                xfs_dqunlock(dqp);
                if (error)
-                        return (error);
+                        return error;
                xfs_qm_mplist_lock(mp);
                if (recl != XFS_QI_MPLRECLAIMS(mp)) {
@@ -540,7 +540,7 @@ again:
        xfs_qm_mplist_unlock(mp);
        /* return ! busy */
-        return (0);
+        return 0;
 }
 /*
 * Release the group dquot pointers the user dquots may be
@@ -599,7 +599,7 @@ xfs_qm_dqpurge_int(
        int             nmisses;
        if (mp->m_quotainfo == NULL)
-                return (0);
+                return 0;
        dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0;
        dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0;
@@ -796,7 +796,7 @@ xfs_qm_dqattach_one(
                        ASSERT(XFS_DQ_IS_LOCKED(dqp));
        }
 #endif
-        return (error);
+        return error;
 }
@@ -897,7 +897,7 @@ xfs_qm_dqattach(
            (! XFS_NOT_DQATTACHED(mp, ip)) ||
            (ip->i_ino == mp->m_sb.sb_uquotino) ||
            (ip->i_ino == mp->m_sb.sb_gquotino))
-                return (0);
+                return 0;
        ASSERT((flags & XFS_QMOPT_ILOCKED) == 0 ||
               XFS_ISLOCKED_INODE_EXCL(ip));
@@ -984,7 +984,7 @@ xfs_qm_dqattach(
        else
                ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
 #endif
-        return (error);
+        return error;
 }
 /*
@@ -1049,7 +1049,7 @@ xfs_qm_sync(
         */
        if (! XFS_IS_QUOTA_ON(mp)) {
                xfs_qm_mplist_unlock(mp);
-                return (0);
+                return 0;
        }
        FOREACH_DQUOT_IN_MP(dqp, mp) {
                /*
@@ -1109,9 +1109,9 @@ xfs_qm_sync(
                error = xfs_qm_dqflush(dqp, flush_flags);
                xfs_dqunlock(dqp);
                if (error && XFS_FORCED_SHUTDOWN(mp))
-                        return(0);      /* Need to prevent umount failure */
+                        return 0;       /* Need to prevent umount failure */
                else if (error)
-                        return (error);
+                        return error;
                xfs_qm_mplist_lock(mp);
                if (recl != XFS_QI_MPLRECLAIMS(mp)) {
@@ -1124,7 +1124,7 @@ xfs_qm_sync(
        }
        xfs_qm_mplist_unlock(mp);
-        return (0);
+        return 0;
 }
@@ -1146,7 +1146,7 @@ xfs_qm_init_quotainfo(
         * Tell XQM that we exist as soon as possible.
         */
        if ((error = xfs_qm_hold_quotafs_ref(mp))) {
-                return (error);
+                return error;
        }
        qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP);
@@ -1158,7 +1158,7 @@ xfs_qm_init_quotainfo(
        if ((error = xfs_qm_init_quotainos(mp))) {
                kmem_free(qinf, sizeof(xfs_quotainfo_t));
                mp->m_quotainfo = NULL;
-                return (error);
+                return error;
        }
        spinlock_init(&qinf->qi_pinlock, "xfs_qinf_pin");
@@ -1166,7 +1166,7 @@ xfs_qm_init_quotainfo(
        qinf->qi_dqreclaims = 0;
        /* mutex used to serialize quotaoffs */
-        mutex_init(&qinf->qi_quotaofflock, MUTEX_DEFAULT, "qoff");
+        mutex_init(&qinf->qi_quotaofflock);
        /* Precalc some constants */
        qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
@@ -1232,7 +1232,7 @@ xfs_qm_init_quotainfo(
                qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
        }
-        return (0);
+        return 0;
 }
@@ -1285,7 +1285,7 @@ xfs_qm_list_init(
        char            *str,
        int             n)
 {
-        mutex_init(&list->qh_lock, MUTEX_DEFAULT, str);
+        mutex_init(&list->qh_lock);
        list->qh_next = NULL;
        list->qh_version = 0;
        list->qh_nelems = 0;
@@ -1332,7 +1332,7 @@ xfs_qm_dqget_noattach(
                         */
                        ASSERT(error != ESRCH);
                        ASSERT(error != ENOENT);
-                        return (error);
+                        return error;
                }
                ASSERT(udqp);
        }
@@ -1355,7 +1355,7 @@ xfs_qm_dqget_noattach(
                                xfs_qm_dqrele(udqp);
                        ASSERT(error != ESRCH);
                        ASSERT(error != ENOENT);
-                        return (error);
+                        return error;
                }
                ASSERT(gdqp);
@@ -1376,7 +1376,7 @@ xfs_qm_dqget_noattach(
        if (udqp) ASSERT(XFS_DQ_IS_LOCKED(udqp));
        if (gdqp) ASSERT(XFS_DQ_IS_LOCKED(gdqp));
 #endif
-        return (0);
+        return 0;
 }
 /*
@@ -1392,26 +1392,28 @@ xfs_qm_qino_alloc(
 {
        xfs_trans_t     *tp;
        int             error;
-        unsigned long s;
+        unsigned long   s;
        cred_t          zerocr;
+        xfs_inode_t     zeroino;
        int             committed;
-        tp = xfs_trans_alloc(mp,XFS_TRANS_QM_QINOCREATE);
+        tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE);
        if ((error = xfs_trans_reserve(tp,
                                      XFS_QM_QINOCREATE_SPACE_RES(mp),
                                      XFS_CREATE_LOG_RES(mp), 0,
                                      XFS_TRANS_PERM_LOG_RES,
                                      XFS_CREATE_LOG_COUNT))) {
                xfs_trans_cancel(tp, 0);
-                return (error);
+                return error;
        }
        memset(&zerocr, 0, sizeof(zerocr));
+        memset(&zeroino, 0, sizeof(zeroino));
-        if ((error = xfs_dir_ialloc(&tp, mp->m_rootip, S_IFREG, 1, 0,
+        if ((error = xfs_dir_ialloc(&tp, &zeroino, S_IFREG, 1, 0,
                                   &zerocr, 0, 1, ip, &committed))) {
                xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
                                 XFS_TRANS_ABORT);
-                return (error);
+                return error;
        }
        /*
@@ -1459,9 +1461,9 @@ xfs_qm_qino_alloc(
        if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES,
                                     NULL))) {
                xfs_fs_cmn_err(CE_ALERT, mp, "XFS qino_alloc failed!");
-                return (error);
+                return error;
        }
-        return (0);
+        return 0;
 }
@@ -1506,7 +1508,7 @@ xfs_qm_reset_dqcounts(
                ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1);
        }
-        return (0);
+        return 0;
 }
 STATIC int
@@ -1555,7 +1557,7 @@ xfs_qm_dqiter_bufs(
                bno++;
                firstid += XFS_QM_DQPERBLK(mp);
        }
-        return (error);
+        return error;
 }
 /*
@@ -1584,7 +1586,7 @@ xfs_qm_dqiterate(
         * happens only at mount time which is single threaded.
         */
        if (qip->i_d.di_nblocks == 0)
-                return (0);
+                return 0;
        map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), KM_SLEEP);
@@ -1653,7 +1655,7 @@ xfs_qm_dqiterate(
        kmem_free(map, XFS_DQITER_MAP_SIZE * sizeof(*map));
-        return (error);
+        return error;
 }
 /*
@@ -1713,7 +1715,7 @@ xfs_qm_get_rtblks(
        ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
        if (!(ifp->if_flags & XFS_IFEXTENTS)) {
                if ((error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK)))
-                        return (error);
+                        return error;
        }
        rtblks = 0;
        nextents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
@@ -1721,7 +1723,7 @@ xfs_qm_get_rtblks(
        for (ep = base; ep < &base[nextents]; ep++)
                rtblks += xfs_bmbt_get_blockcount(ep);
        *O_rtblks = (xfs_qcnt_t)rtblks;
-        return (0);
+        return 0;
 }
 /*
@@ -1765,7 +1767,7 @@ xfs_qm_dqusage_adjust(
         */
        if ((error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip, bno))) {
                *res = BULKSTAT_RV_NOTHING;
-                return (error);
+                return error;
        }
        if (ip->i_d.di_mode == 0) {
@@ -1783,7 +1785,7 @@ xfs_qm_dqusage_adjust(
        if ((error = xfs_qm_dqget_noattach(ip, &udqp, &gdqp))) {
                xfs_iput(ip, XFS_ILOCK_EXCL);
                *res = BULKSTAT_RV_GIVEUP;
-                return (error);
+                return error;
        }
        rtblks = 0;
@@ -1800,7 +1802,7 @@ xfs_qm_dqusage_adjust(
                        if (gdqp)
                                xfs_qm_dqput(gdqp);
                        *res = BULKSTAT_RV_GIVEUP;
-                        return (error);
+                        return error;
                }
                nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks;
        }
@@ -1845,7 +1847,7 @@ xfs_qm_dqusage_adjust(
         * Goto next inode.
         */
        *res = BULKSTAT_RV_DIDONE;
-        return (0);
+        return 0;
 }
 /*
@@ -1918,9 +1920,7 @@ xfs_qm_quotacheck(
         * at this point (because we intentionally didn't in dqget_noattach).
         */
        if (error) {
-                xfs_qm_dqpurge_all(mp,
+                xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_QUOTAOFF);
-                                   XFS_QMOPT_UQUOTA|XFS_QMOPT_GQUOTA|
-                                   XFS_QMOPT_PQUOTA|XFS_QMOPT_QUOTAOFF);
                goto error_return;
        }
        /*
@@ -2041,7 +2041,7 @@ xfs_qm_init_quotainos(
        XFS_QI_UQIP(mp) = uip;
        XFS_QI_GQIP(mp) = gip;
-        return (0);
+        return 0;
 }
@@ -2062,7 +2062,7 @@ xfs_qm_shake_freelist(
        int             nflushes;
        if (howmany <= 0)
-                return (0);
+                return 0;
        nreclaimed = 0;
        restarts = 0;
@@ -2088,7 +2088,7 @@ xfs_qm_shake_freelist(
                        xfs_dqunlock(dqp);
                        xfs_qm_freelist_unlock(xfs_Gqm);
                        if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
-                                return (nreclaimed);
+                                return nreclaimed;
                        XQM_STATS_INC(xqmstats.xs_qm_dqwants);
                        goto tryagain;
                }
@@ -2163,7 +2163,7 @@ xfs_qm_shake_freelist(
                        XFS_DQ_HASH_UNLOCK(hash);
                        xfs_qm_freelist_unlock(xfs_Gqm);
                        if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
-                                return (nreclaimed);
+                                return nreclaimed;
                        goto tryagain;
                }
                xfs_dqtrace_entry(dqp, "DQSHAKE: UNLINKING");
@@ -2188,7 +2188,7 @@ xfs_qm_shake_freelist(
                dqp = nextdqp;
        }
        xfs_qm_freelist_unlock(xfs_Gqm);
-        return (nreclaimed);
+        return nreclaimed;
 }
@@ -2202,9 +2202,9 @@ xfs_qm_shake(int nr_to_scan, gfp_t gfp_mask)
        int     ndqused, nfree, n;
        if (!kmem_shake_allow(gfp_mask))
-                return (0);
+                return 0;
        if (!xfs_Gqm)
-                return (0);
+                return 0;
        nfree = xfs_Gqm->qm_dqfreelist.qh_nelems; /* free dquots */
        /* incore dquots in all f/s's */
@@ -2213,7 +2213,7 @@ xfs_qm_shake(int nr_to_scan, gfp_t gfp_mask)
        ASSERT(ndqused >= 0);
        if (nfree <= ndqused && nfree < ndquot)
-                return (0);
+                return 0;
        ndqused *= xfs_Gqm->qm_dqfree_ratio;    /* target # of free dquots */
        n = nfree - ndqused - ndquot;           /* # over target */
@@ -2257,7 +2257,7 @@ xfs_qm_dqreclaim_one(void)
                        xfs_dqunlock(dqp);
                        xfs_qm_freelist_unlock(xfs_Gqm);
                        if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
-                                return (NULL);
+                                return NULL;
                        XQM_STATS_INC(xqmstats.xs_qm_dqwants);
                        goto startagain;
                }
@@ -2333,7 +2333,7 @@ xfs_qm_dqreclaim_one(void)
        }
        xfs_qm_freelist_unlock(xfs_Gqm);
-        return (dqpout);
+        return dqpout;
 }
@@ -2369,7 +2369,7 @@ xfs_qm_dqalloc_incore(
                         */
                        memset(&dqp->q_core, 0, sizeof(dqp->q_core));
                        *O_dqpp = dqp;
-                        return (B_FALSE);
+                        return B_FALSE;
                }
                XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses);
        }
@@ -2382,7 +2382,7 @@ xfs_qm_dqalloc_incore(
        *O_dqpp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP);
        atomic_inc(&xfs_Gqm->qm_totaldquots);
-        return (B_TRUE);
+        return B_TRUE;
 }
@@ -2407,13 +2407,13 @@ xfs_qm_write_sb_changes(
                                      0,
                                      XFS_DEFAULT_LOG_COUNT))) {
                xfs_trans_cancel(tp, 0);
-                return (error);
+                return error;
        }
        xfs_mod_sb(tp, flags);
        (void) xfs_trans_commit(tp, 0, NULL);
-        return (0);
+        return 0;
 }
@@ -2463,7 +2463,7 @@ xfs_qm_vop_dqalloc(
                if ((error = xfs_qm_dqattach(ip, XFS_QMOPT_DQALLOC |
                                            XFS_QMOPT_ILOCKED))) {
                        xfs_iunlock(ip, lockflags);
-                        return (error);
+                        return error;
                }
        }
@@ -2486,7 +2486,7 @@ xfs_qm_vop_dqalloc(
                                                 XFS_QMOPT_DOWARN,
                                                 &uq))) {
                                ASSERT(error != ENOENT);
-                                return (error);
+                                return error;
                        }
                        /*
                         * Get the ilock in the right order.
@@ -2517,7 +2517,7 @@ xfs_qm_vop_dqalloc(
                                if (uq)
                                        xfs_qm_dqrele(uq);
                                ASSERT(error != ENOENT);
-                                return (error);
+                                return error;
                        }
                        xfs_dqunlock(gq);
                        lockflags = XFS_ILOCK_SHARED;
@@ -2565,7 +2565,7 @@ xfs_qm_vop_dqalloc(
                *O_gdqpp = gq;
        else if (gq)
                xfs_qm_dqrele(gq);
-        return (0);
+        return 0;
 }
 /*
@@ -2608,7 +2608,7 @@ xfs_qm_vop_chown(
        xfs_dqunlock(newdq);
        *IO_olddq = newdq;
-        return (prevdq);
+        return prevdq;
 }
 /*
@@ -2702,12 +2702,12 @@ xfs_qm_vop_rename_dqattach(
        ip = i_tab[0];
        if (! XFS_IS_QUOTA_ON(ip->i_mount))
-                return (0);
+                return 0;
        if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) {
                error = xfs_qm_dqattach(ip, 0);
                if (error)
-                        return (error);
+                        return error;
        }
        for (i = 1; (i < 4 && i_tab[i]); i++) {
                /*
@@ -2717,11 +2717,11 @@ xfs_qm_vop_rename_dqattach(
                        if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) {
                                error = xfs_qm_dqattach(ip, 0);
                                if (error)
-                                        return (error);
+                                        return error;
                        }
                }
        }
-        return (0);
+        return 0;
 }
 void
@@ -2743,6 +2743,7 @@ xfs_qm_vop_dqattach_and_dqmod_newinode(
                xfs_dqunlock(udqp);
                ASSERT(ip->i_udquot == NULL);
                ip->i_udquot = udqp;
+                ASSERT(XFS_IS_UQUOTA_ON(tp->t_mountp));
                ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
                xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
        }
@@ -2752,7 +2753,10 @@ xfs_qm_vop_dqattach_and_dqmod_newinode(
                xfs_dqunlock(gdqp);
                ASSERT(ip->i_gdquot == NULL);
                ip->i_gdquot = gdqp;
-                ASSERT(ip->i_d.di_gid == be32_to_cpu(gdqp->q_core.d_id));
+                ASSERT(XFS_IS_OQUOTA_ON(tp->t_mountp));
+                ASSERT((XFS_IS_GQUOTA_ON(tp->t_mountp) ?
+                        ip->i_d.di_gid : ip->i_d.di_projid) ==
+                                be32_to_cpu(gdqp->q_core.d_id));
                xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
        }
 }
@@ -2762,7 +2766,7 @@ STATIC void
 xfs_qm_freelist_init(xfs_frlist_t *ql)
 {
        ql->qh_next = ql->qh_prev = (xfs_dquot_t *) ql;
-        mutex_init(&ql->qh_lock, MUTEX_DEFAULT, "dqf");
+        mutex_init(&ql->qh_lock);
        ql->qh_version = 0;
        ql->qh_nelems = 0;
 }
@@ -2772,7 +2776,7 @@ xfs_qm_freelist_destroy(xfs_frlist_t *ql)
 {
        xfs_dquot_t     *dqp, *nextdqp;
-        mutex_lock(&ql->qh_lock, PINOD);
+        mutex_lock(&ql->qh_lock);
        for (dqp = ql->qh_next;
             dqp != (xfs_dquot_t *)ql; ) {
                xfs_dqlock(dqp);
@@ -2830,7 +2834,7 @@ xfs_qm_dqhashlock_nowait(
        int locked;
        locked = mutex_trylock(&((dqp)->q_hash->qh_lock));
-        return (locked);
+        return locked;
 }
 int
@@ -2840,7 +2844,7 @@ xfs_qm_freelist_lock_nowait(
        int locked;
        locked = mutex_trylock(&(xqm->qm_dqfreelist.qh_lock));
-        return (locked);
+        return locked;
 }
 STATIC int
@@ -2851,5 +2855,5 @@ xfs_qm_mplist_nowait(
        ASSERT(mp->m_quotainfo);
        locked = mutex_trylock(&(XFS_QI_MPLLOCK(mp)));
-        return (locked);
+        return locked;
 }
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
index 12da259f2fcb..4568deb6da86 100644
--- a/fs/xfs/quota/xfs_qm.h
+++ b/fs/xfs/quota/xfs_qm.h
@@ -165,7 +165,7 @@ typedef struct xfs_dquot_acct {
 #define XFS_QM_IWARNLIMIT       5
 #define XFS_QM_RTBWARNLIMIT     5
-#define XFS_QM_LOCK(xqm)        (mutex_lock(&xqm##_lock, PINOD))
+#define XFS_QM_LOCK(xqm)        (mutex_lock(&xqm##_lock))
 #define XFS_QM_UNLOCK(xqm)      (mutex_unlock(&xqm##_lock))
 #define XFS_QM_HOLD(xqm)        ((xqm)->qm_nrefs++)
 #define XFS_QM_RELE(xqm)        ((xqm)->qm_nrefs--)
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index d9d2993de435..90402a1c3983 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -363,7 +363,7 @@ xfs_qm_init(void)
                KERN_INFO "SGI XFS Quota Management subsystem\n";
        printk(message);
-        mutex_init(&xfs_Gqm_lock, MUTEX_DEFAULT, "xfs_qmlock");
+        mutex_init(&xfs_Gqm_lock);
        vfs_bhv_set_custom(&xfs_qmops, &xfs_qmcore_xfs);
        xfs_qm_init_procfs();
 }
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 24690e1af659..676884394aae 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -15,6 +15,9 @@
 * along with this program; if not, write the Free Software Foundation,
 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
+#include <linux/capability.h>
 #include "xfs.h"
 #include "xfs_fs.h"
 #include "xfs_bit.h"
@@ -233,7 +236,7 @@ xfs_qm_scall_quotaoff(
         */
        ASSERT(mp->m_quotainfo);
        if (mp->m_quotainfo)
-                mutex_lock(&(XFS_QI_QOFFLOCK(mp)), PINOD);
+                mutex_lock(&(XFS_QI_QOFFLOCK(mp)));
        ASSERT(mp->m_quotainfo);
@@ -508,7 +511,7 @@ xfs_qm_scall_quotaon(
        /*
         * Switch on quota enforcement in core.
         */
-        mutex_lock(&(XFS_QI_QOFFLOCK(mp)), PINOD);
+        mutex_lock(&(XFS_QI_QOFFLOCK(mp)));
        mp->m_qflags |= (flags & XFS_ALL_QUOTA_ENFD);
        mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
@@ -617,7 +620,7 @@ xfs_qm_scall_setqlim(
         * a quotaoff from happening). (XXXThis doesn't currently happen
         * because we take the vfslock before calling xfs_qm_sysent).
         */
-        mutex_lock(&(XFS_QI_QOFFLOCK(mp)), PINOD);
+        mutex_lock(&(XFS_QI_QOFFLOCK(mp)));
        /*
         * Get the dquot (locked), and join it to the transaction.
@@ -1426,7 +1429,7 @@ xfs_qm_internalqcheck(
        xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC);
        XFS_bflush(mp->m_ddev_targp);
-        mutex_lock(&qcheck_lock, PINOD);
+        mutex_lock(&qcheck_lock);
        /* There should be absolutely no quota activity while this
           is going on. */
        qmtest_udqtab = kmem_zalloc(qmtest_hashmask *
diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h
index 7a9f3beb818c..b7ddd04aae32 100644
--- a/fs/xfs/quota/xfs_quota_priv.h
+++ b/fs/xfs/quota/xfs_quota_priv.h
@@ -51,7 +51,7 @@
 #define XFS_QI_MPLNEXT(mp)      ((mp)->m_quotainfo->qi_dqlist.qh_next)
 #define XFS_QI_MPLNDQUOTS(mp)   ((mp)->m_quotainfo->qi_dqlist.qh_nelems)
-#define XQMLCK(h)                       (mutex_lock(&((h)->qh_lock), PINOD))
+#define XQMLCK(h)                       (mutex_lock(&((h)->qh_lock)))
 #define XQMUNLCK(h)                     (mutex_unlock(&((h)->qh_lock)))
 #ifdef DEBUG
 struct xfs_dqhash;
diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c
index bb6dc91ea261..b08b3d9345b7 100644
--- a/fs/xfs/support/debug.c
+++ b/fs/xfs/support/debug.c
@@ -27,45 +27,12 @@ static DEFINE_SPINLOCK(xfs_err_lock);
 /* Translate from CE_FOO to KERN_FOO, err_level(CE_FOO) == KERN_FOO */
 #define XFS_MAX_ERR_LEVEL       7
 #define XFS_ERR_MASK            ((1 << 3) - 1)
-static char             *err_level[XFS_MAX_ERR_LEVEL+1] =
+static const char * const       err_level[XFS_MAX_ERR_LEVEL+1] =
                                        {KERN_EMERG, KERN_ALERT, KERN_CRIT,
                                         KERN_ERR, KERN_WARNING, KERN_NOTICE,
                                         KERN_INFO, KERN_DEBUG};
 void
-assfail(char *a, char *f, int l)
-{
-    printk("XFS assertion failed: %s, file: %s, line: %d\n", a, f, l);
-    BUG();
-}
-#if ((defined(DEBUG) || defined(INDUCE_IO_ERRROR)) && !defined(NO_WANT_RANDOM))
-unsigned long
-random(void)
-{
-        static unsigned long    RandomValue = 1;
-        /* cycles pseudo-randomly through all values between 1 and 2^31 - 2 */
-        register long   rv = RandomValue;
-        register long   lo;
-        register long   hi;
-        hi = rv / 127773;
-        lo = rv % 127773;
-        rv = 16807 * lo - 2836 * hi;
-        if( rv <= 0 ) rv += 2147483647;
-        return( RandomValue = rv );
-}
-int
-get_thread_id(void)
-{
-        return current->pid;
-}
-#endif /* DEBUG || INDUCE_IO_ERRROR || !NO_WANT_RANDOM */
-void
 cmn_err(register int level, char *fmt, ...)
 {
        char    *fp = fmt;
@@ -90,7 +57,6 @@ cmn_err(register int level, char *fmt, ...)
                BUG();
 }
 void
 icmn_err(register int level, char *fmt, va_list ap)
 {
@@ -109,3 +75,27 @@ icmn_err(register int level, char *fmt, va_list ap)
        if (level == CE_PANIC)
                BUG();
 }
+void
+assfail(char *expr, char *file, int line)
+{
+        printk("Assertion failed: %s, file: %s, line: %d\n", expr, file, line);
+        BUG();
+}
+#if ((defined(DEBUG) || defined(INDUCE_IO_ERRROR)) && !defined(NO_WANT_RANDOM))
+unsigned long random(void)
+{
+        static unsigned long    RandomValue = 1;
+        /* cycles pseudo-randomly through all values between 1 and 2^31 - 2 */
+        register long   rv = RandomValue;
+        register long   lo;
+        register long   hi;
+        hi = rv / 127773;
+        lo = rv % 127773;
+        rv = 16807 * lo - 2836 * hi;
+        if (rv <= 0) rv += 2147483647;
+        return RandomValue = rv;
+}
+#endif /* DEBUG || INDUCE_IO_ERRROR || !NO_WANT_RANDOM */
diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h
index aff558664c32..e3bf58112e7e 100644
--- a/fs/xfs/support/debug.h
+++ b/fs/xfs/support/debug.h
@@ -31,24 +31,23 @@ extern void icmn_err(int, char *, va_list)
        __attribute__ ((format (printf, 2, 0)));
 extern void cmn_err(int, char *, ...)
        __attribute__ ((format (printf, 2, 3)));
+extern void assfail(char *expr, char *f, int l);
-#ifndef STATIC
+#define prdev(fmt,targ,args...) \
-# define STATIC static
+        printk("Device %s - " fmt "\n", XFS_BUFTARG_NAME(targ), ## args)
-#endif
-#ifdef DEBUG
+#define ASSERT_ALWAYS(expr)     \
-# define ASSERT(EX)     ((EX) ? ((void)0) : assfail(#EX, __FILE__, __LINE__))
+        (unlikely((expr) != 0) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
-#else
-# define ASSERT(x)      ((void)0)
-#endif
-extern void assfail(char *, char *, int);
+#ifndef DEBUG
-#ifdef DEBUG
+# define ASSERT(expr)   ((void)0)
+#else
+# define ASSERT(expr)   ASSERT_ALWAYS(expr)
 extern unsigned long random(void);
-extern int get_thread_id(void);
 #endif
-#define ASSERT_ALWAYS(EX)  ((EX)?((void)0):assfail(#EX, __FILE__, __LINE__))
+#ifndef STATIC
-#define debug_stop_all_cpus(param)      /* param is "cpumask_t *" */
+# define STATIC static
+#endif
 #endif  /* __XFS_SUPPORT_DEBUG_H__ */
diff --git a/fs/xfs/support/uuid.c b/fs/xfs/support/uuid.c
index 70ce40914c8a..a3d565a67734 100644
--- a/fs/xfs/support/uuid.c
+++ b/fs/xfs/support/uuid.c
@@ -24,9 +24,19 @@ static uuid_t	*uuid_table;
 void
 uuid_init(void)
 {
-        mutex_init(&uuid_monitor, MUTEX_DEFAULT, "uuid_monitor");
+        mutex_init(&uuid_monitor);
 }
+/* IRIX interpretation of an uuid_t */
+typedef struct {
+        __be32  uu_timelow;
+        __be16  uu_timemid;
+        __be16  uu_timehi;
+        __be16  uu_clockseq;
+        __be16  uu_node[3];
+} xfs_uu_t;
 /*
 * uuid_getnodeuniq - obtain the node unique fields of a UUID.
 *
@@ -36,16 +46,11 @@ uuid_init(void)
 void
 uuid_getnodeuniq(uuid_t *uuid, int fsid [2])
 {
-        char    *uu = (char *)uuid;
+        xfs_uu_t *uup = (xfs_uu_t *)uuid;
-        /* on IRIX, this function assumes big-endian fields within
-         * the uuid, so we use INT_GET to get the same result on
-         * little-endian systems
-         */
-        fsid[0] = (INT_GET(*(u_int16_t*)(uu+8), ARCH_CONVERT) << 16) +
+        fsid[0] = (be16_to_cpu(uup->uu_clockseq) << 16) |
-                   INT_GET(*(u_int16_t*)(uu+4), ARCH_CONVERT);
+                   be16_to_cpu(uup->uu_timemid);
-        fsid[1] =  INT_GET(*(u_int32_t*)(uu  ), ARCH_CONVERT);
+        fsid[1] = be16_to_cpu(uup->uu_timelow);
 }
 void
@@ -94,7 +99,7 @@ uuid_table_insert(uuid_t *uuid)
 {
        int     i, hole;
-        mutex_lock(&uuid_monitor, PVFS);
+        mutex_lock(&uuid_monitor);
        for (i = 0, hole = -1; i < uuid_table_size; i++) {
                if (uuid_is_nil(&uuid_table[i])) {
                        hole = i;
@@ -122,7 +127,7 @@ uuid_table_remove(uuid_t *uuid)
 {
        int     i;
-        mutex_lock(&uuid_monitor, PVFS);
+        mutex_lock(&uuid_monitor);
        for (i = 0; i < uuid_table_size; i++) {
                if (uuid_is_nil(&uuid_table[i]))
                        continue;
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index cc9c91b9e771..4ff0f4e41c61 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -36,6 +36,7 @@
 #include "xfs_mac.h"
 #include "xfs_attr.h"
+#include <linux/capability.h>
 #include <linux/posix_acl_xattr.h>
 STATIC int      xfs_acl_setmode(vnode_t *, xfs_acl_t *, int *);
diff --git a/fs/xfs/xfs_arch.h b/fs/xfs/xfs_arch.h
index 68e5051d8e24..c4836890b726 100644
--- a/fs/xfs/xfs_arch.h
+++ b/fs/xfs/xfs_arch.h
@@ -40,6 +40,22 @@
 #undef XFS_NATIVE_HOST
 #endif
+#ifdef XFS_NATIVE_HOST
+#define cpu_to_be16(val)        ((__be16)(val))
+#define cpu_to_be32(val)        ((__be32)(val))
+#define cpu_to_be64(val)        ((__be64)(val))
+#define be16_to_cpu(val)        ((__uint16_t)(val))
+#define be32_to_cpu(val)        ((__uint32_t)(val))
+#define be64_to_cpu(val)        ((__uint64_t)(val))
+#else
+#define cpu_to_be16(val)        (__swab16((__uint16_t)(val)))
+#define cpu_to_be32(val)        (__swab32((__uint32_t)(val)))
+#define cpu_to_be64(val)        (__swab64((__uint64_t)(val)))
+#define be16_to_cpu(val)        (__swab16((__be16)(val)))
+#define be32_to_cpu(val)        (__swab32((__be32)(val)))
+#define be64_to_cpu(val)        (__swab64((__be64)(val)))
+#endif
 #endif  /* __KERNEL__ */
 /* do we need conversion? */
@@ -186,7 +202,7 @@ static inline void be64_add(__be64 *a, __s64 b)
 */ 
 #define XFS_GET_DIR_INO4(di) \
-        (((u32)(di).i[0] << 24) | ((di).i[1] << 16) | ((di).i[2] << 8) | ((di).i[3]))
+        (((__u32)(di).i[0] << 24) | ((di).i[1] << 16) | ((di).i[2] << 8) | ((di).i[3]))
 #define XFS_PUT_DIR_INO4(from, di) \
 do { \
@@ -197,9 +213,9 @@ do { \
 } while (0)
 #define XFS_DI_HI(di) \
-        (((u32)(di).i[1] << 16) | ((di).i[2] << 8) | ((di).i[3]))
+        (((__u32)(di).i[1] << 16) | ((di).i[2] << 8) | ((di).i[3]))
 #define XFS_DI_LO(di) \
-        (((u32)(di).i[4] << 24) | ((di).i[5] << 16) | ((di).i[6] << 8) | ((di).i[7]))
+        (((__u32)(di).i[4] << 24) | ((di).i[5] << 16) | ((di).i[6] << 8) | ((di).i[7]))
 #define XFS_GET_DIR_INO8(di)        \
        (((xfs_ino_t)XFS_DI_LO(di) & 0xffffffffULL) | \
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 5484eeb460c8..e5e91e9c7e89 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -15,6 +15,9 @@
 * along with this program; if not, write the Free Software Foundation,
 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
+#include <linux/capability.h>
 #include "xfs.h"
 #include "xfs_fs.h"
 #include "xfs_types.h"
@@ -117,11 +120,6 @@ xfs_attr_fetch(xfs_inode_t *ip, const char *name, int namelen,
             ip->i_d.di_anextents == 0))
                return(ENOATTR);
-        if (!(flags & (ATTR_KERNACCESS|ATTR_SECURE))) {
-                if ((error = xfs_iaccess(ip, S_IRUSR, cred)))
-                        return(XFS_ERROR(error));
-        }
        /*
         * Fill in the arg structure for this request.
         */
@@ -425,7 +423,7 @@ xfs_attr_set(bhv_desc_t *bdp, const char *name, char *value, int valuelen, int f
             struct cred *cred)
 {
        xfs_inode_t     *dp;
-        int             namelen, error;
+        int             namelen;
        namelen = strlen(name);
        if (namelen >= MAXNAMELEN)
@@ -437,14 +435,6 @@ xfs_attr_set(bhv_desc_t *bdp, const char *name, char *value, int valuelen, int f
        if (XFS_FORCED_SHUTDOWN(dp->i_mount))
                return (EIO);
-        xfs_ilock(dp, XFS_ILOCK_SHARED);
-        if (!(flags & ATTR_SECURE) &&
-             (error = xfs_iaccess(dp, S_IWUSR, cred))) {
-                xfs_iunlock(dp, XFS_ILOCK_SHARED);
-                return(XFS_ERROR(error));
-        }
-        xfs_iunlock(dp, XFS_ILOCK_SHARED);
        return xfs_attr_set_int(dp, name, namelen, value, valuelen, flags);
 }
@@ -579,7 +569,7 @@ int
 xfs_attr_remove(bhv_desc_t *bdp, const char *name, int flags, struct cred *cred)
 {
        xfs_inode_t         *dp;
-        int                 namelen, error;
+        int                 namelen;
        namelen = strlen(name);
        if (namelen >= MAXNAMELEN)
@@ -592,11 +582,7 @@ xfs_attr_remove(bhv_desc_t *bdp, const char *name, int flags, struct cred *cred)
                return (EIO);
        xfs_ilock(dp, XFS_ILOCK_SHARED);
-        if (!(flags & ATTR_SECURE) &&
+        if (XFS_IFORK_Q(dp) == 0 ||
-             (error = xfs_iaccess(dp, S_IWUSR, cred))) {
-                xfs_iunlock(dp, XFS_ILOCK_SHARED);
-                return(XFS_ERROR(error));
-        } else if (XFS_IFORK_Q(dp) == 0 ||
                   (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
                    dp->i_d.di_anextents == 0)) {
                xfs_iunlock(dp, XFS_ILOCK_SHARED);
@@ -668,12 +654,6 @@ xfs_attr_list(bhv_desc_t *bdp, char *buffer, int bufsize, int flags,
                return (EIO);
        xfs_ilock(dp, XFS_ILOCK_SHARED);
-        if (!(flags & ATTR_SECURE) &&
-             (error = xfs_iaccess(dp, S_IRUSR, cred))) {
-                xfs_iunlock(dp, XFS_ILOCK_SHARED);
-                return(XFS_ERROR(error));
-        }
        /*
         * Decide on what work routines to call based on the inode size.
         */
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 1c7421840c18..fe91eac4e2a7 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -128,7 +128,7 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
                return (offset >= minforkoff) ? minforkoff : 0;
        }
-        if (unlikely(mp->m_flags & XFS_MOUNT_COMPAT_ATTR)) {
+        if (!(mp->m_flags & XFS_MOUNT_ATTR2)) {
                if (bytes <= XFS_IFORK_ASIZE(dp))
                        return mp->m_attroffset >> 3;
                return 0;
@@ -157,7 +157,7 @@ xfs_sbversion_add_attr2(xfs_mount_t *mp, xfs_trans_t *tp)
 {
        unsigned long s;
-        if (!(mp->m_flags & XFS_MOUNT_COMPAT_ATTR) &&
+        if ((mp->m_flags & XFS_MOUNT_ATTR2) &&
            !(XFS_SB_VERSION_HASATTR2(&mp->m_sb))) {
                s = XFS_SB_LOCK(mp);
                if (!XFS_SB_VERSION_HASATTR2(&mp->m_sb)) {
@@ -311,7 +311,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
         */
        totsize -= size;
        if (totsize == sizeof(xfs_attr_sf_hdr_t) && !args->addname &&
-            !(mp->m_flags & XFS_MOUNT_COMPAT_ATTR)) {
+            (mp->m_flags & XFS_MOUNT_ATTR2)) {
                /*
                 * Last attribute now removed, revert to original
                 * inode format making all literal area available
@@ -330,7 +330,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
                dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize);
                ASSERT(dp->i_d.di_forkoff);
                ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) || args->addname ||
-                        (mp->m_flags & XFS_MOUNT_COMPAT_ATTR));
+                        !(mp->m_flags & XFS_MOUNT_ATTR2));
                dp->i_afp->if_ext_max =
                        XFS_IFORK_ASIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
                dp->i_df.if_ext_max =
@@ -739,7 +739,7 @@ xfs_attr_shortform_allfit(xfs_dabuf_t *bp, xfs_inode_t *dp)
                                + name_loc->namelen
                                + INT_GET(name_loc->valuelen, ARCH_CONVERT);
        }
-        if (!(dp->i_mount->m_flags & XFS_MOUNT_COMPAT_ATTR) &&
+        if ((dp->i_mount->m_flags & XFS_MOUNT_ATTR2) &&
            (bytes == sizeof(struct xfs_attr_sf_hdr)))
                return(-1);
        return(xfs_attr_shortform_bytesfit(dp, bytes));
@@ -778,7 +778,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
                goto out;
        if (forkoff == -1) {
-                ASSERT(!(dp->i_mount->m_flags & XFS_MOUNT_COMPAT_ATTR));
+                ASSERT(dp->i_mount->m_flags & XFS_MOUNT_ATTR2);
                /*
                 * Last attribute was removed, revert to original
diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h
index f6143ff251a0..541e34109bb9 100644
--- a/fs/xfs/xfs_attr_leaf.h
+++ b/fs/xfs/xfs_attr_leaf.h
@@ -63,7 +63,7 @@ struct xfs_trans;
 * the leaf_entry.  The namespaces are independent only because we also look
 * at the namespace bit when we are looking for a matching attribute name.
 *
- * We also store a "incomplete" bit in the leaf_entry.  It shows that an
+ * We also store an "incomplete" bit in the leaf_entry.  It shows that an
 * attribute is in the middle of being created and should not be shown to
 * the user if we crash during the time that the bit is set.  We clear the
 * bit when we have finished setting up the attribute.  We do this because
@@ -72,42 +72,48 @@ struct xfs_trans;
 */
 #define XFS_ATTR_LEAF_MAPSIZE   3       /* how many freespace slots */
+typedef struct xfs_attr_leaf_map {      /* RLE map of free bytes */
+        __uint16_t      base;           /* base of free region */
+        __uint16_t      size;           /* length of free region */
+} xfs_attr_leaf_map_t;
+typedef struct xfs_attr_leaf_hdr {      /* constant-structure header block */
+        xfs_da_blkinfo_t info;          /* block type, links, etc. */
+        __uint16_t      count;          /* count of active leaf_entry's */
+        __uint16_t      usedbytes;      /* num bytes of names/values stored */
+        __uint16_t      firstused;      /* first used byte in name area */
+        __uint8_t       holes;          /* != 0 if blk needs compaction */
+        __uint8_t       pad1;
+        xfs_attr_leaf_map_t freemap[XFS_ATTR_LEAF_MAPSIZE];
+                                        /* N largest free regions */
+} xfs_attr_leaf_hdr_t;
+typedef struct xfs_attr_leaf_entry {    /* sorted on key, not name */
+        xfs_dahash_t    hashval;        /* hash value of name */
+        __uint16_t      nameidx;        /* index into buffer of name/value */
+        __uint8_t       flags;          /* LOCAL/ROOT/SECURE/INCOMPLETE flag */
+        __uint8_t       pad2;           /* unused pad byte */
+} xfs_attr_leaf_entry_t;
+typedef struct xfs_attr_leaf_name_local {
+        __uint16_t      valuelen;       /* number of bytes in value */
+        __uint8_t       namelen;        /* length of name bytes */
+        __uint8_t       nameval[1];     /* name/value bytes */
+} xfs_attr_leaf_name_local_t;
+typedef struct xfs_attr_leaf_name_remote {
+        xfs_dablk_t     valueblk;       /* block number of value bytes */
+        __uint32_t      valuelen;       /* number of bytes in value */
+        __uint8_t       namelen;        /* length of name bytes */
+        __uint8_t       name[1];        /* name bytes */
+} xfs_attr_leaf_name_remote_t;
 typedef struct xfs_attr_leafblock {
-        struct xfs_attr_leaf_hdr {      /* constant-structure header block */
+        xfs_attr_leaf_hdr_t     hdr;    /* constant-structure header block */
-                xfs_da_blkinfo_t info;  /* block type, links, etc. */
+        xfs_attr_leaf_entry_t   entries[1];     /* sorted on key, not name */
-                __uint16_t count;       /* count of active leaf_entry's */
+        xfs_attr_leaf_name_local_t namelist;    /* grows from bottom of buf */
-                __uint16_t usedbytes;   /* num bytes of names/values stored */
+        xfs_attr_leaf_name_remote_t valuelist;  /* grows from bottom of buf */
-                __uint16_t firstused;   /* first used byte in name area */
-                __uint8_t  holes;       /* != 0 if blk needs compaction */
-                __uint8_t  pad1;
-                struct xfs_attr_leaf_map {        /* RLE map of free bytes */
-                        __uint16_t base;          /* base of free region */
-                        __uint16_t size;          /* length of free region */
-                } freemap[XFS_ATTR_LEAF_MAPSIZE]; /* N largest free regions */
-        } hdr;
-        struct xfs_attr_leaf_entry {    /* sorted on key, not name */
-                xfs_dahash_t hashval;   /* hash value of name */
-                __uint16_t nameidx;     /* index into buffer of name/value */
-                __uint8_t flags;        /* LOCAL/ROOT/SECURE/INCOMPLETE flag */
-                __uint8_t pad2;         /* unused pad byte */
-        } entries[1];                   /* variable sized array */
-        struct xfs_attr_leaf_name_local {
-                __uint16_t valuelen;    /* number of bytes in value */
-                __uint8_t namelen;      /* length of name bytes */
-                __uint8_t nameval[1];   /* name/value bytes */
-        } namelist;                     /* grows from bottom of buf */
-        struct xfs_attr_leaf_name_remote {
-                xfs_dablk_t valueblk;   /* block number of value bytes */
-                __uint32_t valuelen;    /* number of bytes in value */
-                __uint8_t namelen;      /* length of name bytes */
-                __uint8_t name[1];      /* name bytes */
-        } valuelist;                    /* grows from bottom of buf */
 } xfs_attr_leafblock_t;
-typedef struct xfs_attr_leaf_hdr xfs_attr_leaf_hdr_t;
-typedef struct xfs_attr_leaf_map xfs_attr_leaf_map_t;
-typedef struct xfs_attr_leaf_entry xfs_attr_leaf_entry_t;
-typedef struct xfs_attr_leaf_name_local xfs_attr_leaf_name_local_t;
-typedef struct xfs_attr_leaf_name_remote xfs_attr_leaf_name_remote_t;
 /*
 * Flags used in the leaf_entry[i].flags field.
@@ -150,7 +156,8 @@ xfs_attr_leaf_name_local(xfs_attr_leafblock_t *leafp, int idx)
                (leafp))[INT_GET((leafp)->entries[idx].nameidx, ARCH_CONVERT)];
 }
-#define XFS_ATTR_LEAF_NAME(leafp,idx)           xfs_attr_leaf_name(leafp,idx)
+#define XFS_ATTR_LEAF_NAME(leafp,idx)           \
+        xfs_attr_leaf_name(leafp,idx)
 static inline char *xfs_attr_leaf_name(xfs_attr_leafblock_t *leafp, int idx)
 {
        return (&((char *)
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index e415a4698e9c..70625e577c70 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -2146,13 +2146,176 @@ xfs_bmap_add_extent_hole_real(
        return 0; /* keep gcc quite */
 }
+/*
+ * Adjust the size of the new extent based on di_extsize and rt extsize.
+ */
+STATIC int
+xfs_bmap_extsize_align(
+        xfs_mount_t     *mp,
+        xfs_bmbt_irec_t *gotp,          /* next extent pointer */
+        xfs_bmbt_irec_t *prevp,         /* previous extent pointer */
+        xfs_extlen_t    extsz,          /* align to this extent size */
+        int             rt,             /* is this a realtime inode? */
+        int             eof,            /* is extent at end-of-file? */
+        int             delay,          /* creating delalloc extent? */
+        int             convert,        /* overwriting unwritten extent? */
+        xfs_fileoff_t   *offp,          /* in/out: aligned offset */
+        xfs_extlen_t    *lenp)          /* in/out: aligned length */
+{
+        xfs_fileoff_t   orig_off;       /* original offset */
+        xfs_extlen_t    orig_alen;      /* original length */
+        xfs_fileoff_t   orig_end;       /* original off+len */
+        xfs_fileoff_t   nexto;          /* next file offset */
+        xfs_fileoff_t   prevo;          /* previous file offset */
+        xfs_fileoff_t   align_off;      /* temp for offset */
+        xfs_extlen_t    align_alen;     /* temp for length */
+        xfs_extlen_t    temp;           /* temp for calculations */
+        if (convert)
+                return 0;
+        orig_off = align_off = *offp;
+        orig_alen = align_alen = *lenp;
+        orig_end = orig_off + orig_alen;
+        /*
+         * If this request overlaps an existing extent, then don't
+         * attempt to perform any additional alignment.
+         */
+        if (!delay && !eof &&
+            (orig_off >= gotp->br_startoff) &&
+            (orig_end <= gotp->br_startoff + gotp->br_blockcount)) {
+                return 0;
+        }
+        /*
+         * If the file offset is unaligned vs. the extent size
+         * we need to align it.  This will be possible unless
+         * the file was previously written with a kernel that didn't
+         * perform this alignment, or if a truncate shot us in the
+         * foot.
+         */
+        temp = do_mod(orig_off, extsz);
+        if (temp) {
+                align_alen += temp;
+                align_off -= temp;
+        }
+        /*
+         * Same adjustment for the end of the requested area.
+         */
+        if ((temp = (align_alen % extsz))) {
+                align_alen += extsz - temp;
+        }
+        /*
+         * If the previous block overlaps with this proposed allocation
+         * then move the start forward without adjusting the length.
+         */
+        if (prevp->br_startoff != NULLFILEOFF) {
+                if (prevp->br_startblock == HOLESTARTBLOCK)
+                        prevo = prevp->br_startoff;
+                else
+                        prevo = prevp->br_startoff + prevp->br_blockcount;
+        } else
+                prevo = 0;
+        if (align_off != orig_off && align_off < prevo)
+                align_off = prevo;
+        /*
+         * If the next block overlaps with this proposed allocation
+         * then move the start back without adjusting the length,
+         * but not before offset 0.
+         * This may of course make the start overlap previous block,
+         * and if we hit the offset 0 limit then the next block
+         * can still overlap too.
+         */
+        if (!eof && gotp->br_startoff != NULLFILEOFF) {
+                if ((delay && gotp->br_startblock == HOLESTARTBLOCK) ||
+                    (!delay && gotp->br_startblock == DELAYSTARTBLOCK))
+                        nexto = gotp->br_startoff + gotp->br_blockcount;
+                else
+                        nexto = gotp->br_startoff;
+        } else
+                nexto = NULLFILEOFF;
+        if (!eof &&
+            align_off + align_alen != orig_end &&
+            align_off + align_alen > nexto)
+                align_off = nexto > align_alen ? nexto - align_alen : 0;
+        /*
+         * If we're now overlapping the next or previous extent that
+         * means we can't fit an extsz piece in this hole.  Just move
+         * the start forward to the first valid spot and set
+         * the length so we hit the end.
+         */
+        if (align_off != orig_off && align_off < prevo)
+                align_off = prevo;
+        if (align_off + align_alen != orig_end &&
+            align_off + align_alen > nexto &&
+            nexto != NULLFILEOFF) {
+                ASSERT(nexto > prevo);
+                align_alen = nexto - align_off;
+        }
+        /*
+         * If realtime, and the result isn't a multiple of the realtime
+         * extent size we need to remove blocks until it is.
+         */
+        if (rt && (temp = (align_alen % mp->m_sb.sb_rextsize))) {
+                /*
+                 * We're not covering the original request, or
+                 * we won't be able to once we fix the length.
+                 */
+                if (orig_off < align_off ||
+                    orig_end > align_off + align_alen ||
+                    align_alen - temp < orig_alen)
+                        return XFS_ERROR(EINVAL);
+                /*
+                 * Try to fix it by moving the start up.
+                 */
+                if (align_off + temp <= orig_off) {
+                        align_alen -= temp;
+                        align_off += temp;
+                }
+                /*
+                 * Try to fix it by moving the end in.
+                 */
+                else if (align_off + align_alen - temp >= orig_end)
+                        align_alen -= temp;
+                /*
+                 * Set the start to the minimum then trim the length.
+                 */
+                else {
+                        align_alen -= orig_off - align_off;
+                        align_off = orig_off;
+                        align_alen -= align_alen % mp->m_sb.sb_rextsize;
+                }
+                /*
+                 * Result doesn't cover the request, fail it.
+                 */
+                if (orig_off < align_off || orig_end > align_off + align_alen)
+                        return XFS_ERROR(EINVAL);
+        } else {
+                ASSERT(orig_off >= align_off);
+                ASSERT(orig_end <= align_off + align_alen);
+        }
+#ifdef DEBUG
+        if (!eof && gotp->br_startoff != NULLFILEOFF)
+                ASSERT(align_off + align_alen <= gotp->br_startoff);
+        if (prevp->br_startoff != NULLFILEOFF)
+                ASSERT(align_off >= prevp->br_startoff + prevp->br_blockcount);
+#endif
+        *lenp = align_alen;
+        *offp = align_off;
+        return 0;
+}
 #define XFS_ALLOC_GAP_UNITS     4
 /*
 * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
 * It figures out where to ask the underlying allocator to put the new extent.
 */
-STATIC int                              /* error */
+STATIC int
 xfs_bmap_alloc(
        xfs_bmalloca_t  *ap)            /* bmap alloc argument struct */
 {
@@ -2163,10 +2326,10 @@ xfs_bmap_alloc(
        xfs_mount_t     *mp;            /* mount point structure */
        int             nullfb;         /* true if ap->firstblock isn't set */
        int             rt;             /* true if inode is realtime */
-#ifdef __KERNEL__
+        xfs_extlen_t    prod = 0;       /* product factor for allocators */
-        xfs_extlen_t    prod=0;         /* product factor for allocators */
+        xfs_extlen_t    ralen = 0;      /* realtime allocation length */
-        xfs_extlen_t    ralen=0;        /* realtime allocation length */
+        xfs_extlen_t    align;          /* minimum allocation alignment */
-#endif
+        xfs_rtblock_t   rtx;
 #define ISVALID(x,y)    \
        (rt ? \
@@ -2182,125 +2345,25 @@ xfs_bmap_alloc(
        nullfb = ap->firstblock == NULLFSBLOCK;
        rt = XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata;
        fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock);
-#ifdef __KERNEL__
        if (rt) {
-                xfs_extlen_t    extsz;          /* file extent size for rt */
+                align = ap->ip->i_d.di_extsize ?
-                xfs_fileoff_t   nexto;          /* next file offset */
+                        ap->ip->i_d.di_extsize : mp->m_sb.sb_rextsize;
-                xfs_extlen_t    orig_alen;      /* original ap->alen */
+                /* Set prod to match the extent size */
-                xfs_fileoff_t   orig_end;       /* original off+len */
+                prod = align / mp->m_sb.sb_rextsize;
-                xfs_fileoff_t   orig_off;       /* original ap->off */
-                xfs_extlen_t    mod_off;        /* modulus calculations */
+                error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,
-                xfs_fileoff_t   prevo;          /* previous file offset */
+                                                align, rt, ap->eof, 0,
-                xfs_rtblock_t   rtx;            /* realtime extent number */
+                                                ap->conv, &ap->off, &ap->alen);
-                xfs_extlen_t    temp;           /* temp for rt calculations */
+                if (error)
+                        return error;
-                /*
+                ASSERT(ap->alen);
-                 * Set prod to match the realtime extent size.
-                 */
-                if (!(extsz = ap->ip->i_d.di_extsize))
-                        extsz = mp->m_sb.sb_rextsize;
-                prod = extsz / mp->m_sb.sb_rextsize;
-                orig_off = ap->off;
-                orig_alen = ap->alen;
-                orig_end = orig_off + orig_alen;
-                /*
-                 * If the file offset is unaligned vs. the extent size
-                 * we need to align it.  This will be possible unless
-                 * the file was previously written with a kernel that didn't
-                 * perform this alignment.
-                 */
-                mod_off = do_mod(orig_off, extsz);
-                if (mod_off) {
-                        ap->alen += mod_off;
-                        ap->off -= mod_off;
-                }
-                /*
-                 * Same adjustment for the end of the requested area.
-                 */
-                if ((temp = (ap->alen % extsz)))
-                        ap->alen += extsz - temp;
-                /*
-                 * If the previous block overlaps with this proposed allocation
-                 * then move the start forward without adjusting the length.
-                 */
-                prevo =
-                        ap->prevp->br_startoff == NULLFILEOFF ?
-                                0 :
-                                (ap->prevp->br_startoff +
-                                 ap->prevp->br_blockcount);
-                if (ap->off != orig_off && ap->off < prevo)
-                        ap->off = prevo;
-                /*
-                 * If the next block overlaps with this proposed allocation
-                 * then move the start back without adjusting the length,
-                 * but not before offset 0.
-                 * This may of course make the start overlap previous block,
-                 * and if we hit the offset 0 limit then the next block
-                 * can still overlap too.
-                 */
-                nexto = (ap->eof || ap->gotp->br_startoff == NULLFILEOFF) ?
-                        NULLFILEOFF : ap->gotp->br_startoff;
-                if (!ap->eof &&
-                    ap->off + ap->alen != orig_end &&
-                    ap->off + ap->alen > nexto)
-                        ap->off = nexto > ap->alen ? nexto - ap->alen : 0;
-                /*
-                 * If we're now overlapping the next or previous extent that
-                 * means we can't fit an extsz piece in this hole.  Just move
-                 * the start forward to the first valid spot and set
-                 * the length so we hit the end.
-                 */
-                if ((ap->off != orig_off && ap->off < prevo) ||
-                    (ap->off + ap->alen != orig_end &&
-                     ap->off + ap->alen > nexto)) {
-                        ap->off = prevo;
-                        ap->alen = nexto - prevo;
-                }
-                /*
-                 * If the result isn't a multiple of rtextents we need to
-                 * remove blocks until it is.
-                 */
-                if ((temp = (ap->alen % mp->m_sb.sb_rextsize))) {
-                        /*
-                         * We're not covering the original request, or
-                         * we won't be able to once we fix the length.
-                         */
-                        if (orig_off < ap->off ||
-                            orig_end > ap->off + ap->alen ||
-                            ap->alen - temp < orig_alen)
-                                return XFS_ERROR(EINVAL);
-                        /*
-                         * Try to fix it by moving the start up.
-                         */
-                        if (ap->off + temp <= orig_off) {
-                                ap->alen -= temp;
-                                ap->off += temp;
-                        }
-                        /*
-                         * Try to fix it by moving the end in.
-                         */
-                        else if (ap->off + ap->alen - temp >= orig_end)
-                                ap->alen -= temp;
-                        /*
-                         * Set the start to the minimum then trim the length.
-                         */
-                        else {
-                                ap->alen -= orig_off - ap->off;
-                                ap->off = orig_off;
-                                ap->alen -= ap->alen % mp->m_sb.sb_rextsize;
-                        }
-                        /*
-                         * Result doesn't cover the request, fail it.
-                         */
-                        if (orig_off < ap->off || orig_end > ap->off + ap->alen)
-                                return XFS_ERROR(EINVAL);
-                }
                ASSERT(ap->alen % mp->m_sb.sb_rextsize == 0);
                /*
                 * If the offset & length are not perfectly aligned
                 * then kill prod, it will just get us in trouble.
                 */
-                if (do_mod(ap->off, extsz) || ap->alen % extsz)
+                if (do_mod(ap->off, align) || ap->alen % align)
                        prod = 1;
                /*
                 * Set ralen to be the actual requested length in rtextents.
@@ -2326,15 +2389,24 @@ xfs_bmap_alloc(
                        ap->rval = rtx * mp->m_sb.sb_rextsize;
                } else
                        ap->rval = 0;
+        } else {
+                align = (ap->userdata && ap->ip->i_d.di_extsize &&
+                        (ap->ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)) ?
+                        ap->ip->i_d.di_extsize : 0;
+                if (unlikely(align)) {
+                        error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,
+                                                        align, rt,
+                                                        ap->eof, 0, ap->conv,
+                                                        &ap->off, &ap->alen);
+                        ASSERT(!error);
+                        ASSERT(ap->alen);
+                }
+                if (nullfb)
+                        ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
+                else
+                        ap->rval = ap->firstblock;
        }
-#else
-        if (rt)
-                ap->rval = 0;
-#endif  /* __KERNEL__ */
-        else if (nullfb)
-                ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
-        else
-                ap->rval = ap->firstblock;
        /*
         * If allocating at eof, and there's a previous real block,
         * try to use it's last block as our starting point.
@@ -2598,11 +2670,12 @@ xfs_bmap_alloc(
                        args.total = ap->total;
                        args.minlen = ap->minlen;
                }
-                if (ap->ip->i_d.di_extsize) {
+                if (unlikely(ap->userdata && ap->ip->i_d.di_extsize &&
+                            (ap->ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE))) {
                        args.prod = ap->ip->i_d.di_extsize;
                        if ((args.mod = (xfs_extlen_t)do_mod(ap->off, args.prod)))
                                args.mod = (xfs_extlen_t)(args.prod - args.mod);
-                } else if (mp->m_sb.sb_blocksize >= NBPP) {
+                } else if (unlikely(mp->m_sb.sb_blocksize >= NBPP)) {
                        args.prod = 1;
                        args.mod = 0;
                } else {
@@ -3580,14 +3653,16 @@ xfs_bmap_search_extents(
        ep = xfs_bmap_do_search_extents(base, lastx, nextents, bno, eofp,
                                          lastxp, gotp, prevp);
-        rt = ip->i_d.di_flags & XFS_DIFLAG_REALTIME;
+        rt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
-        if(!rt && !gotp->br_startblock && (*lastxp != NULLEXTNUM)) {
+        if (unlikely(!rt && !gotp->br_startblock && (*lastxp != NULLEXTNUM))) {
                cmn_err(CE_PANIC,"Access to block zero: fs: <%s> inode: %lld "
                        "start_block : %llx start_off : %llx blkcnt : %llx "
                        "extent-state : %x \n",
-                        (ip->i_mount)->m_fsname,(long long)ip->i_ino,
+                        (ip->i_mount)->m_fsname, (long long)ip->i_ino,
-                        gotp->br_startblock, gotp->br_startoff,
+                        (unsigned long long)gotp->br_startblock,
-                        gotp->br_blockcount,gotp->br_state);
+                        (unsigned long long)gotp->br_startoff,
+                        (unsigned long long)gotp->br_blockcount,
+                        gotp->br_state);
        }
        return ep;
 }
@@ -3875,7 +3950,7 @@ xfs_bmap_add_attrfork(
                ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size);
                if (!ip->i_d.di_forkoff)
                        ip->i_d.di_forkoff = mp->m_attroffset >> 3;
-                else if (!(mp->m_flags & XFS_MOUNT_COMPAT_ATTR))
+                else if (mp->m_flags & XFS_MOUNT_ATTR2)
                        version = 2;
                break;
        default:
@@ -4023,13 +4098,13 @@ xfs_bmap_compute_maxlevels(
         */
        if (whichfork == XFS_DATA_FORK) {
                maxleafents = MAXEXTNUM;
-                sz = (mp->m_flags & XFS_MOUNT_COMPAT_ATTR) ?
+                sz = (mp->m_flags & XFS_MOUNT_ATTR2) ?
-                        mp->m_attroffset : XFS_BMDR_SPACE_CALC(MINDBTPTRS);
+                        XFS_BMDR_SPACE_CALC(MINDBTPTRS) : mp->m_attroffset;
        } else {
                maxleafents = MAXAEXTNUM;
-                sz = (mp->m_flags & XFS_MOUNT_COMPAT_ATTR) ?
+                sz = (mp->m_flags & XFS_MOUNT_ATTR2) ?
-                        mp->m_sb.sb_inodesize - mp->m_attroffset :
+                        XFS_BMDR_SPACE_CALC(MINABTPTRS) :
-                        XFS_BMDR_SPACE_CALC(MINABTPTRS);
+                        mp->m_sb.sb_inodesize - mp->m_attroffset;
        }
        maxrootrecs = (int)XFS_BTREE_BLOCK_MAXRECS(sz, xfs_bmdr, 0);
        minleafrecs = mp->m_bmap_dmnr[0];
@@ -4418,8 +4493,8 @@ xfs_bmap_read_extents(
                num_recs = be16_to_cpu(block->bb_numrecs);
                if (unlikely(i + num_recs > room)) {
                        ASSERT(i + num_recs <= room);
-                        xfs_fs_cmn_err(CE_WARN, ip->i_mount,
+                        xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
-                                "corrupt dinode %Lu, (btree extents).  Unmount and run xfs_repair.",
+                                "corrupt dinode %Lu, (btree extents).",
                                (unsigned long long) ip->i_ino);
                        XFS_ERROR_REPORT("xfs_bmap_read_extents(1)",
                                         XFS_ERRLEVEL_LOW,
@@ -4590,6 +4665,7 @@ xfs_bmapi(
        char            contig;         /* allocation must be one extent */
        char            delay;          /* this request is for delayed alloc */
        char            exact;          /* don't do all of wasdelayed extent */
+        char            convert;        /* unwritten extent I/O completion */
        xfs_bmbt_rec_t  *ep;            /* extent list entry pointer */
        int             error;          /* error return */
        xfs_bmbt_irec_t got;            /* current extent list record */
@@ -4643,7 +4719,7 @@ xfs_bmapi(
        }
        if (XFS_FORCED_SHUTDOWN(mp))
                return XFS_ERROR(EIO);
-        rt = XFS_IS_REALTIME_INODE(ip);
+        rt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
        ifp = XFS_IFORK_PTR(ip, whichfork);
        ASSERT(ifp->if_ext_max ==
               XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
@@ -4654,6 +4730,7 @@ xfs_bmapi(
        delay = (flags & XFS_BMAPI_DELAY) != 0;
        trim = (flags & XFS_BMAPI_ENTIRE) == 0;
        userdata = (flags & XFS_BMAPI_METADATA) == 0;
+        convert = (flags & XFS_BMAPI_CONVERT) != 0;
        exact = (flags & XFS_BMAPI_EXACT) != 0;
        rsvd = (flags & XFS_BMAPI_RSVBLOCKS) != 0;
        contig = (flags & XFS_BMAPI_CONTIG) != 0;
@@ -4748,15 +4825,25 @@ xfs_bmapi(
                        }
                        minlen = contig ? alen : 1;
                        if (delay) {
-                                xfs_extlen_t    extsz = 0;
+                                xfs_extlen_t    extsz;
                                /* Figure out the extent size, adjust alen */
                                if (rt) {
                                        if (!(extsz = ip->i_d.di_extsize))
                                                extsz = mp->m_sb.sb_rextsize;
-                                        alen = roundup(alen, extsz);
+                                } else {
-                                        extsz = alen / mp->m_sb.sb_rextsize;
+                                        extsz = ip->i_d.di_extsize;
                                }
+                                if (extsz) {
+                                        error = xfs_bmap_extsize_align(mp,
+                                                        &got, &prev, extsz,
+                                                        rt, eof, delay, convert,
+                                                        &aoff, &alen);
+                                        ASSERT(!error);
+                                }
+                                if (rt)
+                                        extsz = alen / mp->m_sb.sb_rextsize;
                                /*
                                 * Make a transaction-less quota reservation for
@@ -4785,32 +4872,33 @@ xfs_bmapi(
                                        xfs_bmap_worst_indlen(ip, alen);
                                ASSERT(indlen > 0);
-                                if (rt)
+                                if (rt) {
                                        error = xfs_mod_incore_sb(mp,
                                                        XFS_SBS_FREXTENTS,
                                                        -(extsz), rsvd);
-                                else
+                                } else {
                                        error = xfs_mod_incore_sb(mp,
                                                        XFS_SBS_FDBLOCKS,
                                                        -(alen), rsvd);
+                                }
                                if (!error) {
                                        error = xfs_mod_incore_sb(mp,
                                                        XFS_SBS_FDBLOCKS,
                                                        -(indlen), rsvd);
-                                        if (error && rt) {
+                                        if (error && rt)
-                                                xfs_mod_incore_sb(ip->i_mount,
+                                                xfs_mod_incore_sb(mp,
                                                        XFS_SBS_FREXTENTS,
                                                        extsz, rsvd);
-                                        } else if (error) {
+                                        else if (error)
-                                                xfs_mod_incore_sb(ip->i_mount,
+                                                xfs_mod_incore_sb(mp,
                                                        XFS_SBS_FDBLOCKS,
                                                        alen, rsvd);
-                                        }
                                }
                                if (error) {
-                                        if (XFS_IS_QUOTA_ON(ip->i_mount))
+                                        if (XFS_IS_QUOTA_ON(mp))
                                                /* unreserve the blocks now */
+                                                (void)
                                                XFS_TRANS_UNRESERVE_QUOTA_NBLKS(
                                                        mp, NULL, ip,
                                                        (long)alen, 0, rt ?
@@ -4849,6 +4937,7 @@ xfs_bmapi(
                                bma.firstblock = *firstblock;
                                bma.alen = alen;
                                bma.off = aoff;
+                                bma.conv = convert;
                                bma.wasdel = wasdelay;
                                bma.minlen = minlen;
                                bma.low = flist->xbf_low;
@@ -5270,8 +5359,7 @@ xfs_bunmapi(
                return 0;
        }
        XFS_STATS_INC(xs_blk_unmap);
-        isrt = (whichfork == XFS_DATA_FORK) &&
+        isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
-               (ip->i_d.di_flags & XFS_DIFLAG_REALTIME);
        start = bno;
        bno = start + len - 1;
        ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
@@ -5443,7 +5531,7 @@ xfs_bunmapi(
                }
                if (wasdel) {
                        ASSERT(STARTBLOCKVAL(del.br_startblock) > 0);
-                        /* Update realtim/data freespace, unreserve quota */
+                        /* Update realtime/data freespace, unreserve quota */
                        if (isrt) {
                                xfs_filblks_t rtexts;
@@ -5451,14 +5539,14 @@ xfs_bunmapi(
                                do_div(rtexts, mp->m_sb.sb_rextsize);
                                xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS,
                                                (int)rtexts, rsvd);
-                                XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, NULL, ip,
+                                (void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp,
-                                        -((long)del.br_blockcount), 0,
+                                        NULL, ip, -((long)del.br_blockcount), 0,
                                        XFS_QMOPT_RES_RTBLKS);
                        } else {
                                xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS,
                                                (int)del.br_blockcount, rsvd);
-                                XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, NULL, ip,
+                                (void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp,
-                                        -((long)del.br_blockcount), 0,
+                                        NULL, ip, -((long)del.br_blockcount), 0,
                                        XFS_QMOPT_RES_REGBLKS);
                        }
                        ip->i_delayed_blks -= del.br_blockcount;
@@ -5652,7 +5740,9 @@ xfs_getbmap(
                   ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
                return XFS_ERROR(EINVAL);
        if (whichfork == XFS_DATA_FORK) {
-                if (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC) {
+                if ((ip->i_d.di_extsize && (ip->i_d.di_flags &
+                                (XFS_DIFLAG_REALTIME|XFS_DIFLAG_EXTSIZE))) ||
+                    ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){
                        prealloced = 1;
                        fixlen = XFS_MAXIOFFSET(mp);
                } else {
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 2e0717a01309..12cc63dfc2c4 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -62,6 +62,10 @@ typedef	struct xfs_bmap_free
 #define XFS_BMAPI_IGSTATE       0x200   /* Ignore state - */
                                        /* combine contig. space */
 #define XFS_BMAPI_CONTIG        0x400   /* must allocate only one extent */
+/*      XFS_BMAPI_DIRECT_IO     0x800   */
+#define XFS_BMAPI_CONVERT       0x1000  /* unwritten extent conversion - */
+                                        /* need write cache flushing and no */
+                                        /* additional allocation alignments */
 #define XFS_BMAPI_AFLAG(w)      xfs_bmapi_aflag(w)
 static inline int xfs_bmapi_aflag(int w)
@@ -101,7 +105,8 @@ typedef struct xfs_bmalloca {
        char                    wasdel; /* replacing a delayed allocation */
        char                    userdata;/* set if is user data */
        char                    low;    /* low on space, using seq'l ags */
-        char                    aeof;   /* allocated space at eof */
+        char                    aeof;   /* allocated space at eof */
+        char                    conv;   /* overwriting unwritten extents */
 } xfs_bmalloca_t;
 #ifdef __KERNEL__
diff --git a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h
index 328a528b926d..f57cc9ac875e 100644
--- a/fs/xfs/xfs_clnt.h
+++ b/fs/xfs/xfs_clnt.h
@@ -57,7 +57,7 @@ struct xfs_mount_args {
 /*
 * XFS mount option flags -- args->flags1
 */
-#define XFSMNT_COMPAT_ATTR      0x00000001      /* do not use ATTR2 format */
+#define XFSMNT_ATTR2            0x00000001      /* allow ATTR2 EA format */
 #define XFSMNT_WSYNC            0x00000002      /* safe mode nfs mount
                                                 * compatible */
 #define XFSMNT_INO64            0x00000004      /* move inode numbers up
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 070259a4254c..c6191d00ad27 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -60,8 +60,6 @@ xfs_swapext(
        xfs_bstat_t     *sbp;
        struct file     *fp = NULL, *tfp = NULL;
        vnode_t         *vp, *tvp;
-        bhv_desc_t      *bdp, *tbdp;
-        vn_bhv_head_t   *bhp, *tbhp;
        static uint     lock_flags = XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL;
        int             ilf_fields, tilf_fields;
        int             error = 0;
@@ -90,13 +88,10 @@ xfs_swapext(
                goto error0;
        }
-        bhp = VN_BHV_HEAD(vp);
+        ip = xfs_vtoi(vp);
-        bdp = vn_bhv_lookup(bhp, &xfs_vnodeops);
+        if (ip == NULL) {
-        if (bdp == NULL) {
                error = XFS_ERROR(EBADF);
                goto error0;
-        } else {
-                ip = XFS_BHVTOI(bdp);
        }
        if (((tfp = fget((int)sxp->sx_fdtmp)) == NULL) ||
@@ -105,13 +100,10 @@ xfs_swapext(
                goto error0;
        }
-        tbhp = VN_BHV_HEAD(tvp);
+        tip = xfs_vtoi(tvp);
-        tbdp = vn_bhv_lookup(tbhp, &xfs_vnodeops);
+        if (tip == NULL) {
-        if (tbdp == NULL) {
                error = XFS_ERROR(EBADF);
                goto error0;
-        } else {
-                tip = XFS_BHVTOI(tbdp);
        }
        if (ip->i_mount != tip->i_mount) {
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h
index c5a0e537ff1a..79d0d9e1fbab 100644
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/xfs_dinode.h
@@ -199,10 +199,16 @@ typedef enum xfs_dinode_fmt
 #define XFS_DFORK_DSIZE(dip,mp) \
        XFS_CFORK_DSIZE_DISK(&(dip)->di_core, mp)
+#define XFS_DFORK_DSIZE_HOST(dip,mp) \
+        XFS_CFORK_DSIZE(&(dip)->di_core, mp)
 #define XFS_DFORK_ASIZE(dip,mp) \
        XFS_CFORK_ASIZE_DISK(&(dip)->di_core, mp)
+#define XFS_DFORK_ASIZE_HOST(dip,mp) \
+        XFS_CFORK_ASIZE(&(dip)->di_core, mp)
 #define XFS_DFORK_SIZE(dip,mp,w) \
        XFS_CFORK_SIZE_DISK(&(dip)->di_core, mp, w)
+#define XFS_DFORK_SIZE_HOST(dip,mp,w) \
+        XFS_CFORK_SIZE(&(dip)->di_core, mp, w)
 #define XFS_DFORK_Q(dip)                    XFS_CFORK_Q_DISK(&(dip)->di_core)
 #define XFS_DFORK_BOFF(dip)                 XFS_CFORK_BOFF_DISK(&(dip)->di_core)
@@ -216,6 +222,7 @@ typedef enum xfs_dinode_fmt
 #define XFS_CFORK_FMT_SET(dcp,w,n) \
        ((w) == XFS_DATA_FORK ? \
                ((dcp)->di_format = (n)) : ((dcp)->di_aformat = (n)))
+#define XFS_DFORK_FORMAT(dip,w) XFS_CFORK_FORMAT(&(dip)->di_core, w)
 #define XFS_CFORK_NEXTENTS_DISK(dcp,w) \
        ((w) == XFS_DATA_FORK ? \
@@ -223,13 +230,13 @@ typedef enum xfs_dinode_fmt
                INT_GET((dcp)->di_anextents, ARCH_CONVERT))
 #define XFS_CFORK_NEXTENTS(dcp,w) \
        ((w) == XFS_DATA_FORK ? (dcp)->di_nextents : (dcp)->di_anextents)
+#define XFS_DFORK_NEXTENTS(dip,w) XFS_CFORK_NEXTENTS_DISK(&(dip)->di_core, w)
+#define XFS_DFORK_NEXTENTS_HOST(dip,w) XFS_CFORK_NEXTENTS(&(dip)->di_core, w)
 #define XFS_CFORK_NEXT_SET(dcp,w,n) \
        ((w) == XFS_DATA_FORK ? \
                ((dcp)->di_nextents = (n)) : ((dcp)->di_anextents = (n)))
-#define XFS_DFORK_NEXTENTS(dip,w) XFS_CFORK_NEXTENTS_DISK(&(dip)->di_core, w)
 #define XFS_BUF_TO_DINODE(bp)   ((xfs_dinode_t *)XFS_BUF_PTR(bp))
 /*
@@ -246,8 +253,10 @@ typedef enum xfs_dinode_fmt
 #define XFS_DIFLAG_NOATIME_BIT   6      /* do not update atime */
 #define XFS_DIFLAG_NODUMP_BIT    7      /* do not dump */
 #define XFS_DIFLAG_RTINHERIT_BIT 8      /* create with realtime bit set */
-#define XFS_DIFLAG_PROJINHERIT_BIT  9   /* create with parents projid */
+#define XFS_DIFLAG_PROJINHERIT_BIT   9  /* create with parents projid */
-#define XFS_DIFLAG_NOSYMLINKS_BIT  10   /* disallow symlink creation */
+#define XFS_DIFLAG_NOSYMLINKS_BIT   10  /* disallow symlink creation */
+#define XFS_DIFLAG_EXTSIZE_BIT      11  /* inode extent size allocator hint */
+#define XFS_DIFLAG_EXTSZINHERIT_BIT 12  /* inherit inode extent size */
 #define XFS_DIFLAG_REALTIME      (1 << XFS_DIFLAG_REALTIME_BIT)
 #define XFS_DIFLAG_PREALLOC      (1 << XFS_DIFLAG_PREALLOC_BIT)
 #define XFS_DIFLAG_NEWRTBM       (1 << XFS_DIFLAG_NEWRTBM_BIT)
@@ -259,11 +268,14 @@ typedef enum xfs_dinode_fmt
 #define XFS_DIFLAG_RTINHERIT     (1 << XFS_DIFLAG_RTINHERIT_BIT)
 #define XFS_DIFLAG_PROJINHERIT   (1 << XFS_DIFLAG_PROJINHERIT_BIT)
 #define XFS_DIFLAG_NOSYMLINKS    (1 << XFS_DIFLAG_NOSYMLINKS_BIT)
+#define XFS_DIFLAG_EXTSIZE       (1 << XFS_DIFLAG_EXTSIZE_BIT)
+#define XFS_DIFLAG_EXTSZINHERIT  (1 << XFS_DIFLAG_EXTSZINHERIT_BIT)
 #define XFS_DIFLAG_ANY \
        (XFS_DIFLAG_REALTIME | XFS_DIFLAG_PREALLOC | XFS_DIFLAG_NEWRTBM | \
         XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \
         XFS_DIFLAG_NOATIME | XFS_DIFLAG_NODUMP | XFS_DIFLAG_RTINHERIT | \
-         XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS)
+         XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS | XFS_DIFLAG_EXTSIZE | \
+         XFS_DIFLAG_EXTSZINHERIT)
 #endif  /* __XFS_DINODE_H__ */
diff --git a/fs/xfs/xfs_dir.c b/fs/xfs/xfs_dir.c
index 3dd30391f551..bb87d2a700a9 100644
--- a/fs/xfs/xfs_dir.c
+++ b/fs/xfs/xfs_dir.c
@@ -176,7 +176,7 @@ xfs_dir_mount(xfs_mount_t *mp)
        uint shortcount, leafcount, count;
        mp->m_dirversion = 1;
-        if (mp->m_flags & XFS_MOUNT_COMPAT_ATTR) {
+        if (!(mp->m_flags & XFS_MOUNT_ATTR2)) {
                shortcount = (mp->m_attroffset -
                                (uint)sizeof(xfs_dir_sf_hdr_t)) /
                                 (uint)sizeof(xfs_dir_sf_entry_t);
diff --git a/fs/xfs/xfs_dir.h b/fs/xfs/xfs_dir.h
index 488defe86ba6..8cc8afb9f6c0 100644
--- a/fs/xfs/xfs_dir.h
+++ b/fs/xfs/xfs_dir.h
@@ -135,6 +135,8 @@ void	xfs_dir_startup(void);	/* called exactly once */
        ((mp)->m_dirops.xd_shortform_to_single(args))
 #define XFS_DIR_IS_V1(mp)       ((mp)->m_dirversion == 1)
+#define XFS_DIR_IS_V2(mp)       ((mp)->m_dirversion == 2)
 extern xfs_dirops_t xfsv1_dirops;
+extern xfs_dirops_t xfsv2_dirops;
 #endif  /* __XFS_DIR_H__ */
diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/xfs_dir2.h
index 7e24ffeda9e1..3158f5dc431f 100644
--- a/fs/xfs/xfs_dir2.h
+++ b/fs/xfs/xfs_dir2.h
@@ -72,9 +72,6 @@ typedef struct xfs_dir2_put_args {
        struct uio      *uio;           /* uio control structure */
 } xfs_dir2_put_args_t;
-#define XFS_DIR_IS_V2(mp)       ((mp)->m_dirversion == 2)
-extern xfs_dirops_t     xfsv2_dirops;
 /*
 * Other interfaces used by the rest of the dir v2 code.
 */
diff --git a/fs/xfs/xfs_dir_leaf.c b/fs/xfs/xfs_dir_leaf.c
index 950df31efc46..e83074016abb 100644
--- a/fs/xfs/xfs_dir_leaf.c
+++ b/fs/xfs/xfs_dir_leaf.c
@@ -147,7 +147,7 @@ xfs_dir_shortform_create(xfs_da_args_t *args, xfs_ino_t parent)
        hdr->count = 0;
        dp->i_d.di_size = sizeof(*hdr);
        xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
-        return(0);
+        return 0;
 }
 /*
@@ -180,7 +180,7 @@ xfs_dir_shortform_addname(xfs_da_args_t *args)
                if (sfe->namelen == args->namelen &&
                    args->name[0] == sfe->name[0] &&
                    memcmp(args->name, sfe->name, args->namelen) == 0)
-                        return(XFS_ERROR(EEXIST));
+                        return XFS_ERROR(EEXIST);
                sfe = XFS_DIR_SF_NEXTENTRY(sfe);
        }
@@ -198,7 +198,7 @@ xfs_dir_shortform_addname(xfs_da_args_t *args)
        dp->i_d.di_size += size;
        xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
-        return(0);
+        return 0;
 }
 /*
@@ -238,7 +238,7 @@ xfs_dir_shortform_removename(xfs_da_args_t *args)
        }
        if (i < 0) {
                ASSERT(args->oknoent);
-                return(XFS_ERROR(ENOENT));
+                return XFS_ERROR(ENOENT);
        }
        if ((base + size) != dp->i_d.di_size) {
@@ -251,7 +251,7 @@ xfs_dir_shortform_removename(xfs_da_args_t *args)
        dp->i_d.di_size -= size;
        xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
-        return(0);
+        return 0;
 }
 /*
@@ -390,7 +390,7 @@ xfs_dir_shortform_to_leaf(xfs_da_args_t *iargs)
 out:
        kmem_free(tmpbuffer, size);
-        return(retval);
+        return retval;
 }
 STATIC int
@@ -596,7 +596,7 @@ xfs_dir_shortform_replace(xfs_da_args_t *args)
                /* XXX - replace assert? */
                XFS_DIR_SF_PUT_DIRINO(&args->inumber, &sf->hdr.parent);
                xfs_trans_log_inode(args->trans, dp, XFS_ILOG_DDATA);
-                return(0);
+                return 0;
        }
        ASSERT(args->namelen != 1 || args->name[0] != '.');
        sfe = &sf->list[0];
@@ -608,12 +608,12 @@ xfs_dir_shortform_replace(xfs_da_args_t *args)
                                (char *)&sfe->inumber, sizeof(xfs_ino_t)));
                        XFS_DIR_SF_PUT_DIRINO(&args->inumber, &sfe->inumber);
                        xfs_trans_log_inode(args->trans, dp, XFS_ILOG_DDATA);
-                        return(0);
+                        return 0;
                }
                sfe = XFS_DIR_SF_NEXTENTRY(sfe);
        }
        ASSERT(args->oknoent);
-        return(XFS_ERROR(ENOENT));
+        return XFS_ERROR(ENOENT);
 }
 /*
@@ -695,7 +695,7 @@ xfs_dir_leaf_to_shortform(xfs_da_args_t *iargs)
 out:
        kmem_free(tmpbuffer, XFS_LBSIZE(dp->i_mount));
-        return(retval);
+        return retval;
 }
 /*
@@ -715,17 +715,17 @@ xfs_dir_leaf_to_node(xfs_da_args_t *args)
        retval = xfs_da_grow_inode(args, &blkno);
        ASSERT(blkno == 1);
        if (retval)
-                return(retval);
+                return retval;
        retval = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp1,
                                              XFS_DATA_FORK);
        if (retval)
-                return(retval);
+                return retval;
        ASSERT(bp1 != NULL);
        retval = xfs_da_get_buf(args->trans, args->dp, 1, -1, &bp2,
                                             XFS_DATA_FORK);
        if (retval) {
                xfs_da_buf_done(bp1);
-                return(retval);
+                return retval;
        }
        ASSERT(bp2 != NULL);
        memcpy(bp2->data, bp1->data, XFS_LBSIZE(dp->i_mount));
@@ -738,7 +738,7 @@ xfs_dir_leaf_to_node(xfs_da_args_t *args)
        retval = xfs_da_node_create(args, 0, 1, &bp1, XFS_DATA_FORK);
        if (retval) {
                xfs_da_buf_done(bp2);
-                return(retval);
+                return retval;
        }
        node = bp1->data;
        leaf = bp2->data;
@@ -751,7 +751,7 @@ xfs_dir_leaf_to_node(xfs_da_args_t *args)
                XFS_DA_LOGRANGE(node, &node->btree[0], sizeof(node->btree[0])));
        xfs_da_buf_done(bp1);
-        return(retval);
+        return retval;
 }
@@ -776,7 +776,7 @@ xfs_dir_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp)
        ASSERT(dp != NULL);
        retval = xfs_da_get_buf(args->trans, dp, blkno, -1, &bp, XFS_DATA_FORK);
        if (retval)
-                return(retval);
+                return retval;
        ASSERT(bp != NULL);
        leaf = bp->data;
        memset((char *)leaf, 0, XFS_LBSIZE(dp->i_mount));
@@ -791,7 +791,7 @@ xfs_dir_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp)
        xfs_da_log_buf(args->trans, bp, 0, XFS_LBSIZE(dp->i_mount) - 1);
        *bpp = bp;
-        return(0);
+        return 0;
 }
 /*
@@ -813,10 +813,10 @@ xfs_dir_leaf_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
        ASSERT(oldblk->magic == XFS_DIR_LEAF_MAGIC);
        error = xfs_da_grow_inode(args, &blkno);
        if (error)
-                return(error);
+                return error;
        error = xfs_dir_leaf_create(args, blkno, &newblk->bp);
        if (error)
-                return(error);
+                return error;
        newblk->blkno = blkno;
        newblk->magic = XFS_DIR_LEAF_MAGIC;
@@ -826,7 +826,7 @@ xfs_dir_leaf_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
        xfs_dir_leaf_rebalance(state, oldblk, newblk);
        error = xfs_da_blk_link(state, oldblk, newblk);
        if (error)
-                return(error);
+                return error;
        /*
         * Insert the new entry in the correct block.
@@ -842,7 +842,7 @@ xfs_dir_leaf_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
         */
        oldblk->hashval = xfs_dir_leaf_lasthash(oldblk->bp, NULL);
        newblk->hashval = xfs_dir_leaf_lasthash(newblk->bp, NULL);
-        return(error);
+        return error;
 }
 /*
@@ -885,7 +885,7 @@ xfs_dir_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args, int index)
                if (INT_GET(map->size, ARCH_CONVERT) >= tmp) {
                        if (!args->justcheck)
                                xfs_dir_leaf_add_work(bp, args, index, i);
-                        return(0);
+                        return 0;
                }
                sum += INT_GET(map->size, ARCH_CONVERT);
        }
@@ -896,7 +896,7 @@ xfs_dir_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args, int index)
         * no good and we should just give up.
         */
        if (!hdr->holes && (sum < entsize))
-                return(XFS_ERROR(ENOSPC));
+                return XFS_ERROR(ENOSPC);
        /*
         * Compact the entries to coalesce free space.
@@ -909,18 +909,18 @@ xfs_dir_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args, int index)
                                (uint)sizeof(xfs_dir_leaf_entry_t) : 0,
                        args->justcheck);
        if (error)
-                return(error);
+                return error;
        /*
         * After compaction, the block is guaranteed to have only one
         * free region, in freemap[0].  If it is not big enough, give up.
         */
        if (INT_GET(hdr->freemap[0].size, ARCH_CONVERT) <
            (entsize + (uint)sizeof(xfs_dir_leaf_entry_t)))
-                return(XFS_ERROR(ENOSPC));
+                return XFS_ERROR(ENOSPC);
        if (!args->justcheck)
                xfs_dir_leaf_add_work(bp, args, index, 0);
-        return(0);
+        return 0;
 }
 /*
@@ -1072,7 +1072,7 @@ xfs_dir_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *bp, int musthave,
        kmem_free(tmpbuffer, lbsize);
        if (musthave || justcheck)
                kmem_free(tmpbuffer2, lbsize);
-        return(rval);
+        return rval;
 }
 /*
@@ -1292,7 +1292,7 @@ xfs_dir_leaf_figure_balance(xfs_da_state_t *state,
        *countarg = count;
        *namebytesarg = totallen;
-        return(foundit);
+        return foundit;
 }
 /*========================================================================
@@ -1334,7 +1334,7 @@ xfs_dir_leaf_toosmall(xfs_da_state_t *state, int *action)
                INT_GET(leaf->hdr.namebytes, ARCH_CONVERT);
        if (bytes > (state->blocksize >> 1)) {
                *action = 0;    /* blk over 50%, don't try to join */
-                return(0);
+                return 0;
        }
        /*
@@ -1353,13 +1353,13 @@ xfs_dir_leaf_toosmall(xfs_da_state_t *state, int *action)
                error = xfs_da_path_shift(state, &state->altpath, forward,
                                                 0, &retval);
                if (error)
-                        return(error);
+                        return error;
                if (retval) {
                        *action = 0;
                } else {
                        *action = 2;
                }
-                return(0);
+                return 0;
        }
        /*
@@ -1381,7 +1381,7 @@ xfs_dir_leaf_toosmall(xfs_da_state_t *state, int *action)
                                                            blkno, -1, &bp,
                                                            XFS_DATA_FORK);
                if (error)
-                        return(error);
+                        return error;
                ASSERT(bp != NULL);
                leaf = (xfs_dir_leafblock_t *)info;
@@ -1402,7 +1402,7 @@ xfs_dir_leaf_toosmall(xfs_da_state_t *state, int *action)
        }
        if (i >= 2) {
                *action = 0;
-                return(0);
+                return 0;
        }
        xfs_da_buf_done(bp);
@@ -1419,13 +1419,13 @@ xfs_dir_leaf_toosmall(xfs_da_state_t *state, int *action)
                                                 0, &retval);
        }
        if (error)
-                return(error);
+                return error;
        if (retval) {
                *action = 0;
        } else {
                *action = 1;
        }
-        return(0);
+        return 0;
 }
 /*
@@ -1575,8 +1575,8 @@ xfs_dir_leaf_remove(xfs_trans_t *trans, xfs_dabuf_t *bp, int index)
        tmp += INT_GET(leaf->hdr.count, ARCH_CONVERT) * ((uint)sizeof(xfs_dir_leaf_name_t) - 1);
        tmp += INT_GET(leaf->hdr.namebytes, ARCH_CONVERT);
        if (tmp < mp->m_dir_magicpct)
-                return(1);                      /* leaf is < 37% full */
+                return 1;                       /* leaf is < 37% full */
-        return(0);
+        return 0;
 }
 /*
@@ -1732,7 +1732,7 @@ xfs_dir_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args, int *index)
        if ((probe == INT_GET(leaf->hdr.count, ARCH_CONVERT)) || (INT_GET(entry->hashval, ARCH_CONVERT) != hashval)) {
                *index = probe;
                ASSERT(args->oknoent);
-                return(XFS_ERROR(ENOENT));
+                return XFS_ERROR(ENOENT);
        }
        /*
@@ -1745,14 +1745,14 @@ xfs_dir_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args, int *index)
                    memcmp(args->name, namest->name, args->namelen) == 0) {
                        XFS_DIR_SF_GET_DIRINO(&namest->inumber, &args->inumber);
                        *index = probe;
-                        return(XFS_ERROR(EEXIST));
+                        return XFS_ERROR(EEXIST);
                }
                entry++;
                probe++;
        }
        *index = probe;
        ASSERT(probe == INT_GET(leaf->hdr.count, ARCH_CONVERT) || args->oknoent);
-        return(XFS_ERROR(ENOENT));
+        return XFS_ERROR(ENOENT);
 }
 /*========================================================================
@@ -1890,9 +1890,9 @@ xfs_dir_leaf_order(xfs_dabuf_t *leaf1_bp, xfs_dabuf_t *leaf2_bp)
              INT_GET(leaf1->entries[ 0 ].hashval, ARCH_CONVERT)) ||
             (INT_GET(leaf2->entries[ INT_GET(leaf2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT) <
              INT_GET(leaf1->entries[ INT_GET(leaf1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)))) {
-                return(1);
+                return 1;
        }
-        return(0);
+        return 0;
 }
 /*
@@ -1942,7 +1942,7 @@ xfs_dir_leaf_getdents_int(
        leaf = bp->data;
        if (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) != XFS_DIR_LEAF_MAGIC) {
                *eobp = 1;
-                return(XFS_ERROR(ENOENT));      /* XXX wrong code */
+                return XFS_ERROR(ENOENT);       /* XXX wrong code */
        }
        want_entno = XFS_DA_COOKIE_ENTRY(mp, uio->uio_offset);
@@ -2000,7 +2000,7 @@ xfs_dir_leaf_getdents_int(
                 * the node code will be setting uio_offset anyway.
                 */
                *eobp = 0;
-                return(0);
+                return 0;
        }
        xfs_dir_trace_g_due("leaf: hash found", dp, uio, entry);
@@ -2057,7 +2057,7 @@ xfs_dir_leaf_getdents_int(
                        retval = xfs_da_read_buf(dp->i_transp, dp, thishash,
                                                 nextda, &bp2, XFS_DATA_FORK);
                        if (retval)
-                                return(retval);
+                                return retval;
                        ASSERT(bp2 != NULL);
@@ -2073,7 +2073,7 @@ xfs_dir_leaf_getdents_int(
                                                     leaf2);
                                xfs_da_brelse(dp->i_transp, bp2);
-                                return(XFS_ERROR(EFSCORRUPTED));
+                                return XFS_ERROR(EFSCORRUPTED);
                        }
                        nexthash = INT_GET(leaf2->entries[0].hashval,
@@ -2139,7 +2139,7 @@ xfs_dir_leaf_getdents_int(
                        xfs_dir_trace_g_du("leaf: E-O-B", dp, uio);
-                        return(retval);
+                        return retval;
                }
        }
@@ -2149,7 +2149,7 @@ xfs_dir_leaf_getdents_int(
        xfs_dir_trace_g_du("leaf: E-O-F", dp, uio);
-        return(0);
+        return 0;
 }
 /*
diff --git a/fs/xfs/xfs_dir_leaf.h b/fs/xfs/xfs_dir_leaf.h
index ab6b09eef9ab..eb8cd9a4667f 100644
--- a/fs/xfs/xfs_dir_leaf.h
+++ b/fs/xfs/xfs_dir_leaf.h
@@ -67,34 +67,38 @@ struct xfs_trans;
 */
 #define XFS_DIR_LEAF_MAPSIZE    3       /* how many freespace slots */
+typedef struct xfs_dir_leaf_map {       /* RLE map of free bytes */
+        __uint16_t      base;           /* base of free region */
+        __uint16_t      size;           /* run length of free region */
+} xfs_dir_leaf_map_t;
+typedef struct xfs_dir_leaf_hdr {       /* constant-structure header block */
+        xfs_da_blkinfo_t info;          /* block type, links, etc. */
+        __uint16_t      count;          /* count of active leaf_entry's */
+        __uint16_t      namebytes;      /* num bytes of name strings stored */
+        __uint16_t      firstused;      /* first used byte in name area */
+        __uint8_t       holes;          /* != 0 if blk needs compaction */
+        __uint8_t       pad1;
+        xfs_dir_leaf_map_t freemap[XFS_DIR_LEAF_MAPSIZE];
+} xfs_dir_leaf_hdr_t;
+typedef struct xfs_dir_leaf_entry {     /* sorted on key, not name */
+        xfs_dahash_t    hashval;        /* hash value of name */
+        __uint16_t      nameidx;        /* index into buffer of name */
+        __uint8_t       namelen;        /* length of name string */
+        __uint8_t       pad2;
+} xfs_dir_leaf_entry_t;
+typedef struct xfs_dir_leaf_name {
+        xfs_dir_ino_t   inumber;        /* inode number for this key */
+        __uint8_t       name[1];        /* name string itself */
+} xfs_dir_leaf_name_t;
 typedef struct xfs_dir_leafblock {
-        struct xfs_dir_leaf_hdr {       /* constant-structure header block */
+        xfs_dir_leaf_hdr_t      hdr;    /* constant-structure header block */
-                xfs_da_blkinfo_t info;  /* block type, links, etc. */
+        xfs_dir_leaf_entry_t    entries[1];     /* var sized array */
-                __uint16_t count;       /* count of active leaf_entry's */
+        xfs_dir_leaf_name_t     namelist[1];    /* grows from bottom of buf */
-                __uint16_t namebytes;   /* num bytes of name strings stored */
-                __uint16_t firstused;   /* first used byte in name area */
-                __uint8_t  holes;       /* != 0 if blk needs compaction */
-                __uint8_t  pad1;
-                struct xfs_dir_leaf_map {/* RLE map of free bytes */
-                        __uint16_t base; /* base of free region */
-                        __uint16_t size; /* run length of free region */
-                } freemap[XFS_DIR_LEAF_MAPSIZE]; /* N largest free regions */
-        } hdr;
-        struct xfs_dir_leaf_entry {     /* sorted on key, not name */
-                xfs_dahash_t hashval;   /* hash value of name */
-                __uint16_t nameidx;     /* index into buffer of name */
-                __uint8_t namelen;      /* length of name string */
-                __uint8_t pad2;
-        } entries[1];                   /* var sized array */
-        struct xfs_dir_leaf_name {
-                xfs_dir_ino_t inumber;  /* inode number for this key */
-                __uint8_t name[1];      /* name string itself */
-        } namelist[1];                  /* grows from bottom of buf */
 } xfs_dir_leafblock_t;
-typedef struct xfs_dir_leaf_hdr xfs_dir_leaf_hdr_t;
-typedef struct xfs_dir_leaf_map xfs_dir_leaf_map_t;
-typedef struct xfs_dir_leaf_entry xfs_dir_leaf_entry_t;
-typedef struct xfs_dir_leaf_name xfs_dir_leaf_name_t;
 /*
 * Length of name for which a 512-byte block filesystem
@@ -126,11 +130,10 @@ typedef union {
 #define XFS_PUT_COOKIE(c,mp,bno,entry,hash)     \
        ((c).s.be = XFS_DA_MAKE_BNOENTRY(mp, bno, entry), (c).s.h = (hash))
-typedef struct xfs_dir_put_args
+typedef struct xfs_dir_put_args {
-{
        xfs_dircook_t   cook;           /* cookie of (next) entry */
        xfs_intino_t    ino;            /* inode number */
-        struct xfs_dirent       *dbp;           /* buffer pointer */
+        struct xfs_dirent *dbp;         /* buffer pointer */
        char            *name;          /* directory entry name */
        int             namelen;        /* length of name */
        int             done;           /* output: set if value was stored */
@@ -138,7 +141,8 @@ typedef struct xfs_dir_put_args
        struct uio      *uio;           /* uio control structure */
 } xfs_dir_put_args_t;
-#define XFS_DIR_LEAF_ENTSIZE_BYNAME(len)        xfs_dir_leaf_entsize_byname(len)
+#define XFS_DIR_LEAF_ENTSIZE_BYNAME(len)        \
+        xfs_dir_leaf_entsize_byname(len)
 static inline int xfs_dir_leaf_entsize_byname(int len)
 {
        return (uint)sizeof(xfs_dir_leaf_name_t)-1 + len;
diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h
index 864bf6955689..b4c7f2bc55a0 100644
--- a/fs/xfs/xfs_dmapi.h
+++ b/fs/xfs/xfs_dmapi.h
@@ -152,7 +152,7 @@ typedef enum {
 #define DM_FLAGS_NDELAY         0x001   /* return EAGAIN after dm_pending() */
 #define DM_FLAGS_UNWANTED       0x002   /* event not in fsys dm_eventset_t */
-#define DM_FLAGS_ISEM           0x004   /* thread holds i_sem */
+#define DM_FLAGS_IMUX           0x004   /* thread holds i_mutex */
 #define DM_FLAGS_IALLOCSEM_RD   0x010   /* thread holds i_alloc_sem rd */
 #define DM_FLAGS_IALLOCSEM_WR   0x020   /* thread holds i_alloc_sem wr */
@@ -161,21 +161,21 @@ typedef enum {
 */
 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,0)
 #define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \
-                              DM_FLAGS_ISEM : 0)
+                              DM_FLAGS_IMUX : 0)
-#define DM_SEM_FLAG_WR  (DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_ISEM)
+#define DM_SEM_FLAG_WR  (DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_IMUX)
 #endif
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) && \
    (LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,22))
 #define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \
-                              DM_FLAGS_IALLOCSEM_RD : DM_FLAGS_ISEM)
+                              DM_FLAGS_IALLOCSEM_RD : DM_FLAGS_IMUX)
-#define DM_SEM_FLAG_WR  (DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_ISEM)
+#define DM_SEM_FLAG_WR  (DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_IMUX)
 #endif
 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,4,21)
 #define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \
-                              0 : DM_FLAGS_ISEM)
+                              0 : DM_FLAGS_IMUX)
-#define DM_SEM_FLAG_WR  (DM_FLAGS_ISEM)
+#define DM_SEM_FLAG_WR  (DM_FLAGS_IMUX)
 #endif
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index d7b6b5d16704..2a21c5024017 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -54,7 +54,6 @@ xfs_error_trap(int e)
                if (e != xfs_etrap[i])
                        continue;
                cmn_err(CE_NOTE, "xfs_error_trap: error %d", e);
-                debug_stop_all_cpus((void *)-1LL);
                BUG();
                break;
        }
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 06d8a8426c16..26b8e709a569 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -18,9 +18,6 @@
 #ifndef __XFS_ERROR_H__
 #define __XFS_ERROR_H__
-#define prdev(fmt,targ,args...) \
-        printk("XFS: device %s - " fmt "\n", XFS_BUFTARG_NAME(targ), ## args)
 #define XFS_ERECOVER    1       /* Failure to recover log */
 #define XFS_ELOGSTAT    2       /* Failure to stat log in user space */
 #define XFS_ENOLOGSPACE 3       /* Reservation too large */
@@ -182,8 +179,11 @@ extern int xfs_errortag_clearall_umount(int64_t fsid, char *fsname, int loud);
 struct xfs_mount;
 /* PRINTFLIKE4 */
 extern void xfs_cmn_err(int panic_tag, int level, struct xfs_mount *mp,
-                            char *fmt, ...);
+                        char *fmt, ...);
 /* PRINTFLIKE3 */
 extern void xfs_fs_cmn_err(int level, struct xfs_mount *mp, char *fmt, ...);
+#define xfs_fs_repair_cmn_err(level, mp, fmt, args...) \
+        xfs_fs_cmn_err(level, mp, fmt "  Unmount and run xfs_repair.", ## args)
 #endif  /* __XFS_ERROR_H__ */
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index ba096f80f48d..14010f1fa82f 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -3,15 +3,15 @@
 * All Rights Reserved.
 *
 * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
+ * modify it under the terms of the GNU Lesser General Public License
- * published by the Free Software Foundation.
+ * as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it would be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
+ * GNU Lesser General Public License for more details.
 *
- * You should have received a copy of the GNU General Public License
+ * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, write the Free Software Foundation,
 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
@@ -65,6 +65,8 @@ struct fsxattr {
 #define XFS_XFLAG_RTINHERIT     0x00000100      /* create with rt bit set */
 #define XFS_XFLAG_PROJINHERIT   0x00000200      /* create with parents projid */
 #define XFS_XFLAG_NOSYMLINKS    0x00000400      /* disallow symlink creation */
+#define XFS_XFLAG_EXTSIZE       0x00000800      /* extent size allocator hint */
+#define XFS_XFLAG_EXTSZINHERIT  0x00001000      /* inherit inode extent size */
 #define XFS_XFLAG_HASATTR       0x80000000      /* no DIFLAG for this   */
 /*
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index d1236d6f4045..b4d971b01588 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -501,7 +501,7 @@ xfs_reserve_blocks(
        if (inval == (__uint64_t *)NULL) {
                outval->resblks = mp->m_resblks;
                outval->resblks_avail = mp->m_resblks_avail;
-                return(0);
+                return 0;
        }
        request = *inval;
@@ -537,7 +537,33 @@ xfs_reserve_blocks(
        outval->resblks = mp->m_resblks;
        outval->resblks_avail = mp->m_resblks_avail;
        XFS_SB_UNLOCK(mp, s);
-        return(0);
+        return 0;
+}
+void
+xfs_fs_log_dummy(xfs_mount_t *mp)
+{
+        xfs_trans_t *tp;
+        xfs_inode_t *ip;
+        tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1);
+        atomic_inc(&mp->m_active_trans);
+        if (xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0)) {
+                xfs_trans_cancel(tp, 0);
+                return;
+        }
+        ip = mp->m_rootip;
+        xfs_ilock(ip, XFS_ILOCK_EXCL);
+        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+        xfs_trans_ihold(tp, ip);
+        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+        xfs_trans_set_sync(tp);
+        xfs_trans_commit(tp, 0, NULL);
+        xfs_iunlock(ip, XFS_ILOCK_EXCL);
 }
 int
diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h
index f32713f14f9a..300d0c9d61ad 100644
--- a/fs/xfs/xfs_fsops.h
+++ b/fs/xfs/xfs_fsops.h
@@ -25,5 +25,6 @@ extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt);
 extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval,
                                xfs_fsop_resblks_t *outval);
 extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags);
+extern void xfs_fs_log_dummy(xfs_mount_t *mp);
 #endif  /* __XFS_FSOPS_H__ */
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index fc19eedbd11b..8e380a1fb79b 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -493,7 +493,6 @@ xfs_iget(
 retry:
        if ((inode = iget_locked(XFS_MTOVFS(mp)->vfs_super, ino))) {
-                bhv_desc_t      *bdp;
                xfs_inode_t     *ip;
                vp = LINVFS_GET_VP(inode);
@@ -517,14 +516,12 @@ retry:
                         * to wait for the inode to go away.
                         */
                        if (is_bad_inode(inode) ||
-                            ((bdp = vn_bhv_lookup(VN_BHV_HEAD(vp),
+                            ((ip = xfs_vtoi(vp)) == NULL)) {
-                                                  &xfs_vnodeops)) == NULL)) {
                                iput(inode);
                                delay(1);
                                goto retry;
                        }
-                        ip = XFS_BHVTOI(bdp);
                        if (lock_flags != 0)
                                xfs_ilock(ip, lock_flags);
                        XFS_STATS_INC(xs_ig_found);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index df0d4572d70a..1d7f5a7e063e 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -404,9 +404,8 @@ xfs_iformat(
            INT_GET(dip->di_core.di_nextents, ARCH_CONVERT) +
                INT_GET(dip->di_core.di_anextents, ARCH_CONVERT) >
            INT_GET(dip->di_core.di_nblocks, ARCH_CONVERT))) {
-                xfs_fs_cmn_err(CE_WARN, ip->i_mount,
+                xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
-                        "corrupt dinode %Lu, extent total = %d, nblocks = %Lu."
+                        "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.",
-                        "  Unmount and run xfs_repair.",
                        (unsigned long long)ip->i_ino,
                        (int)(INT_GET(dip->di_core.di_nextents, ARCH_CONVERT)
                            + INT_GET(dip->di_core.di_anextents, ARCH_CONVERT)),
@@ -418,9 +417,8 @@ xfs_iformat(
        }
        if (unlikely(INT_GET(dip->di_core.di_forkoff, ARCH_CONVERT) > ip->i_mount->m_sb.sb_inodesize)) {
-                xfs_fs_cmn_err(CE_WARN, ip->i_mount,
+                xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
-                        "corrupt dinode %Lu, forkoff = 0x%x."
+                        "corrupt dinode %Lu, forkoff = 0x%x.",
-                        "  Unmount and run xfs_repair.",
                        (unsigned long long)ip->i_ino,
                        (int)(INT_GET(dip->di_core.di_forkoff, ARCH_CONVERT)));
                XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW,
@@ -451,8 +449,9 @@ xfs_iformat(
                         * no local regular files yet
                         */
                        if (unlikely((INT_GET(dip->di_core.di_mode, ARCH_CONVERT) & S_IFMT) == S_IFREG)) {
-                                xfs_fs_cmn_err(CE_WARN, ip->i_mount,
+                                xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
-                                        "corrupt inode (local format for regular file) %Lu.  Unmount and run xfs_repair.",
+                                        "corrupt inode %Lu "
+                                        "(local format for regular file).",
                                        (unsigned long long) ip->i_ino);
                                XFS_CORRUPTION_ERROR("xfs_iformat(4)",
                                                     XFS_ERRLEVEL_LOW,
@@ -462,8 +461,9 @@ xfs_iformat(
                        di_size = INT_GET(dip->di_core.di_size, ARCH_CONVERT);
                        if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) {
-                                xfs_fs_cmn_err(CE_WARN, ip->i_mount,
+                                xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
-                                        "corrupt inode %Lu (bad size %Ld for local inode).  Unmount and run xfs_repair.",
+                                        "corrupt inode %Lu "
+                                        "(bad size %Ld for local inode).",
                                        (unsigned long long) ip->i_ino,
                                        (long long) di_size);
                                XFS_CORRUPTION_ERROR("xfs_iformat(5)",
@@ -551,8 +551,9 @@ xfs_iformat_local(
         * kmem_alloc() or memcpy() below.
         */
        if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
-                xfs_fs_cmn_err(CE_WARN, ip->i_mount,
+                xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
-                        "corrupt inode %Lu (bad size %d for local fork, size = %d).  Unmount and run xfs_repair.",
+                        "corrupt inode %Lu "
+                        "(bad size %d for local fork, size = %d).",
                        (unsigned long long) ip->i_ino, size,
                        XFS_DFORK_SIZE(dip, ip->i_mount, whichfork));
                XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW,
@@ -610,8 +611,8 @@ xfs_iformat_extents(
         * kmem_alloc() or memcpy() below.
         */
        if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
-                xfs_fs_cmn_err(CE_WARN, ip->i_mount,
+                xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
-                        "corrupt inode %Lu ((a)extents = %d).  Unmount and run xfs_repair.",
+                        "corrupt inode %Lu ((a)extents = %d).",
                        (unsigned long long) ip->i_ino, nex);
                XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW,
                                     ip->i_mount, dip);
@@ -692,8 +693,8 @@ xfs_iformat_btree(
            || XFS_BMDR_SPACE_CALC(nrecs) >
                        XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)
            || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
-                xfs_fs_cmn_err(CE_WARN, ip->i_mount,
+                xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
-                        "corrupt inode %Lu (btree).  Unmount and run xfs_repair.",
+                        "corrupt inode %Lu (btree).",
                        (unsigned long long) ip->i_ino);
                XFS_ERROR_REPORT("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
                                 ip->i_mount);
@@ -809,6 +810,10 @@ _xfs_dic2xflags(
                        flags |= XFS_XFLAG_PROJINHERIT;
                if (di_flags & XFS_DIFLAG_NOSYMLINKS)
                        flags |= XFS_XFLAG_NOSYMLINKS;
+                if (di_flags & XFS_DIFLAG_EXTSIZE)
+                        flags |= XFS_XFLAG_EXTSIZE;
+                if (di_flags & XFS_DIFLAG_EXTSZINHERIT)
+                        flags |= XFS_XFLAG_EXTSZINHERIT;
        }
        return flags;
@@ -1192,11 +1197,19 @@ xfs_ialloc(
                        if ((mode & S_IFMT) == S_IFDIR) {
                                if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
                                        di_flags |= XFS_DIFLAG_RTINHERIT;
-                        } else {
+                                if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
+                                        di_flags |= XFS_DIFLAG_EXTSZINHERIT;
+                                        ip->i_d.di_extsize = pip->i_d.di_extsize;
+                                }
+                        } else if ((mode & S_IFMT) == S_IFREG) {
                                if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) {
                                        di_flags |= XFS_DIFLAG_REALTIME;
                                        ip->i_iocore.io_flags |= XFS_IOCORE_RT;
                                }
+                                if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
+                                        di_flags |= XFS_DIFLAG_EXTSIZE;
+                                        ip->i_d.di_extsize = pip->i_d.di_extsize;
+                                }
                        }
                        if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) &&
                            xfs_inherit_noatime)
@@ -1262,7 +1275,7 @@ xfs_isize_check(
        if ((ip->i_d.di_mode & S_IFMT) != S_IFREG)
                return;
-        if ( ip->i_d.di_flags & XFS_DIFLAG_REALTIME )
+        if (ip->i_d.di_flags & (XFS_DIFLAG_REALTIME | XFS_DIFLAG_EXTSIZE))
                return;
        nimaps = 2;
@@ -1765,22 +1778,19 @@ xfs_igrow_start(
        xfs_fsize_t     new_size,
        cred_t          *credp)
 {
-        xfs_fsize_t     isize;
        int             error;
        ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0);
        ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0);
        ASSERT(new_size > ip->i_d.di_size);
-        error = 0;
-        isize = ip->i_d.di_size;
        /*
         * Zero any pages that may have been created by
         * xfs_write_file() beyond the end of the file
         * and any blocks between the old and new file sizes.
         */
-        error = xfs_zero_eof(XFS_ITOV(ip), &ip->i_iocore, new_size, isize,
+        error = xfs_zero_eof(XFS_ITOV(ip), &ip->i_iocore, new_size,
-                                new_size);
+                             ip->i_d.di_size, new_size);
        return error;
 }
@@ -3355,6 +3365,11 @@ xfs_iflush_int(
        ip->i_update_core = 0;
        SYNCHRONIZE();
+        /*
+         * Make sure to get the latest atime from the Linux inode.
+         */
+        xfs_synchronize_atime(ip);
        if (XFS_TEST_ERROR(INT_GET(dip->di_core.di_magic,ARCH_CONVERT) != XFS_DINODE_MAGIC,
                               mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {
                xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp,
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 124d30e6143b..1cfbcf18ce86 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -436,6 +436,10 @@ void		xfs_ichgtime(xfs_inode_t *, int);
 xfs_fsize_t     xfs_file_last_byte(xfs_inode_t *);
 void            xfs_lock_inodes(xfs_inode_t **, int, int, uint);
+xfs_inode_t     *xfs_vtoi(struct vnode *vp);
+void            xfs_synchronize_atime(xfs_inode_t *);
 #define xfs_ipincount(ip)       ((unsigned int) atomic_read(&ip->i_pincount))
 #ifdef DEBUG
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 7f3363c621e1..36aa1fcb90a5 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -271,6 +271,11 @@ xfs_inode_item_format(
        if (ip->i_update_size)
                ip->i_update_size = 0;
+        /*
+         * Make sure to get the latest atime from the Linux inode.
+         */
+        xfs_synchronize_atime(ip);
        vecp->i_addr = (xfs_caddr_t)&ip->i_d;
        vecp->i_len  = sizeof(xfs_dinode_core_t);
        XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE);
@@ -603,7 +608,7 @@ xfs_inode_item_trylock(
                if (iip->ili_pushbuf_flag == 0) {
                        iip->ili_pushbuf_flag = 1;
 #ifdef DEBUG
-                        iip->ili_push_owner = get_thread_id();
+                        iip->ili_push_owner = current_pid();
 #endif
                        /*
                         * Inode is left locked in shared mode.
@@ -782,7 +787,7 @@ xfs_inode_item_pushbuf(
         * trying to duplicate our effort.
         */
        ASSERT(iip->ili_pushbuf_flag != 0);
-        ASSERT(iip->ili_push_owner == get_thread_id());
+        ASSERT(iip->ili_push_owner == current_pid());
        /*
         * If flushlock isn't locked anymore, chances are that the
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 45a77a3a6c07..788917f355c4 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -40,7 +40,6 @@
 #include "xfs_ialloc.h"
 #include "xfs_btree.h"
 #include "xfs_bmap.h"
-#include "xfs_bit.h"
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
 #include "xfs_itable.h"
@@ -263,7 +262,7 @@ phase2:
        case BMAPI_WRITE:
                /* If we found an extent, return it */
                if (nimaps &&
-                    (imap.br_startblock != HOLESTARTBLOCK) && 
+                    (imap.br_startblock != HOLESTARTBLOCK) &&
                    (imap.br_startblock != DELAYSTARTBLOCK)) {
                        xfs_iomap_map_trace(XFS_IOMAP_WRITE_MAP, io,
                                        offset, count, iomapp, &imap, flags);
@@ -318,6 +317,58 @@ out:
 }
 STATIC int
+xfs_iomap_eof_align_last_fsb(
+        xfs_mount_t     *mp,
+        xfs_iocore_t    *io,
+        xfs_fsize_t     isize,
+        xfs_extlen_t    extsize,
+        xfs_fileoff_t   *last_fsb)
+{
+        xfs_fileoff_t   new_last_fsb = 0;
+        xfs_extlen_t    align;
+        int             eof, error;
+        if (io->io_flags & XFS_IOCORE_RT)
+                ;
+        /*
+         * If mounted with the "-o swalloc" option, roundup the allocation
+         * request to a stripe width boundary if the file size is >=
+         * stripe width and we are allocating past the allocation eof.
+         */
+        else if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC) &&
+                (isize >= XFS_FSB_TO_B(mp, mp->m_swidth)))
+                new_last_fsb = roundup_64(*last_fsb, mp->m_swidth);
+        /*
+         * Roundup the allocation request to a stripe unit (m_dalign) boundary
+         * if the file size is >= stripe unit size, and we are allocating past
+         * the allocation eof.
+         */
+        else if (mp->m_dalign && (isize >= XFS_FSB_TO_B(mp, mp->m_dalign)))
+                new_last_fsb = roundup_64(*last_fsb, mp->m_dalign);
+        /*
+         * Always round up the allocation request to an extent boundary
+         * (when file on a real-time subvolume or has di_extsize hint).
+         */
+        if (extsize) {
+                if (new_last_fsb)
+                        align = roundup_64(new_last_fsb, extsize);
+                else
+                        align = extsize;
+                new_last_fsb = roundup_64(*last_fsb, align);
+        }
+        if (new_last_fsb) {
+                error = XFS_BMAP_EOF(mp, io, new_last_fsb, XFS_DATA_FORK, &eof);
+                if (error)
+                        return error;
+                if (eof)
+                        *last_fsb = new_last_fsb;
+        }
+        return 0;
+}
+STATIC int
 xfs_flush_space(
        xfs_inode_t     *ip,
        int             *fsynced,
@@ -363,19 +414,20 @@ xfs_iomap_write_direct(
        xfs_iocore_t    *io = &ip->i_iocore;
        xfs_fileoff_t   offset_fsb;
        xfs_fileoff_t   last_fsb;
-        xfs_filblks_t   count_fsb;
+        xfs_filblks_t   count_fsb, resaligned;
        xfs_fsblock_t   firstfsb;
+        xfs_extlen_t    extsz, temp;
+        xfs_fsize_t     isize;
        int             nimaps;
-        int             error;
        int             bmapi_flag;
        int             quota_flag;
        int             rt;
        xfs_trans_t     *tp;
        xfs_bmbt_irec_t imap;
        xfs_bmap_free_t free_list;
-        xfs_filblks_t   qblocks, resblks;
+        uint            qblocks, resblks, resrtextents;
        int             committed;
-        int             resrtextents;
+        int             error;
        /*
         * Make sure that the dquots are there. This doesn't hold
@@ -385,38 +437,53 @@ xfs_iomap_write_direct(
        if (error)
                return XFS_ERROR(error);
-        offset_fsb = XFS_B_TO_FSBT(mp, offset);
+        rt = XFS_IS_REALTIME_INODE(ip);
-        last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
+        if (unlikely(rt)) {
-        count_fsb = last_fsb - offset_fsb;
+                if (!(extsz = ip->i_d.di_extsize))
-        if (found && (ret_imap->br_startblock == HOLESTARTBLOCK)) {
+                        extsz = mp->m_sb.sb_rextsize;
-                xfs_fileoff_t   map_last_fsb;
+        } else {
+                extsz = ip->i_d.di_extsize;
-                map_last_fsb = ret_imap->br_blockcount + ret_imap->br_startoff;
-                if (map_last_fsb < last_fsb) {
-                        last_fsb = map_last_fsb;
-                        count_fsb = last_fsb - offset_fsb;
-                }
-                ASSERT(count_fsb > 0);
        }
-        /*
+        isize = ip->i_d.di_size;
-         * Determine if reserving space on the data or realtime partition.
+        if (io->io_new_size > isize)
-         */
+                isize = io->io_new_size;
-        if ((rt = XFS_IS_REALTIME_INODE(ip))) {
-                xfs_extlen_t    extsz;
-                if (!(extsz = ip->i_d.di_extsize))
+        offset_fsb = XFS_B_TO_FSBT(mp, offset);
-                        extsz = mp->m_sb.sb_rextsize;
+        last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
-                resrtextents = qblocks = (count_fsb + extsz - 1);
+        if ((offset + count) > isize) {
-                do_div(resrtextents, mp->m_sb.sb_rextsize);
+                error = xfs_iomap_eof_align_last_fsb(mp, io, isize, extsz,
-                resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
+                                                        &last_fsb);
-                quota_flag = XFS_QMOPT_RES_RTBLKS;
+                if (error)
+                        goto error_out;
        } else {
-                resrtextents = 0;
+                if (found && (ret_imap->br_startblock == HOLESTARTBLOCK))
-                resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, count_fsb);
+                        last_fsb = MIN(last_fsb, (xfs_fileoff_t)
-                quota_flag = XFS_QMOPT_RES_REGBLKS;
+                                        ret_imap->br_blockcount +
+                                        ret_imap->br_startoff);
+        }
+        count_fsb = last_fsb - offset_fsb;
+        ASSERT(count_fsb > 0);
+        resaligned = count_fsb;
+        if (unlikely(extsz)) {
+                if ((temp = do_mod(offset_fsb, extsz)))
+                        resaligned += temp;
+                if ((temp = do_mod(resaligned, extsz)))
+                        resaligned += extsz - temp;
        }
+        if (unlikely(rt)) {
+                resrtextents = qblocks = resaligned;
+                resrtextents /= mp->m_sb.sb_rextsize;
+                resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
+                quota_flag = XFS_QMOPT_RES_RTBLKS;
+        } else {
+                resrtextents = 0;
+                resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
+                quota_flag = XFS_QMOPT_RES_REGBLKS;
+        }
        /*
         * Allocate and setup the transaction
         */
@@ -426,7 +493,6 @@ xfs_iomap_write_direct(
                        XFS_WRITE_LOG_RES(mp), resrtextents,
                        XFS_TRANS_PERM_LOG_RES,
                        XFS_WRITE_LOG_COUNT);
        /*
         * Check for running out of space, note: need lock to return
         */
@@ -436,20 +502,20 @@ xfs_iomap_write_direct(
        if (error)
                goto error_out;
-        if (XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag)) {
+        error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip,
-                error = (EDQUOT);
+                                              qblocks, 0, quota_flag);
+        if (error)
                goto error1;
-        }
-        bmapi_flag = XFS_BMAPI_WRITE;
        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
        xfs_trans_ihold(tp, ip);
-        if (!(flags & BMAPI_MMAP) && (offset < ip->i_d.di_size || rt))
+        bmapi_flag = XFS_BMAPI_WRITE;
+        if ((flags & BMAPI_DIRECT) && (offset < ip->i_d.di_size || extsz))
                bmapi_flag |= XFS_BMAPI_PREALLOC;
        /*
-         * Issue the bmapi() call to allocate the blocks
+         * Issue the xfs_bmapi() call to allocate the blocks
         */
        XFS_BMAP_INIT(&free_list, &firstfsb);
        nimaps = 1;
@@ -484,8 +550,10 @@ xfs_iomap_write_direct(
                        "extent-state : %x \n",
                        (ip->i_mount)->m_fsname,
                        (long long)ip->i_ino,
-                        ret_imap->br_startblock, ret_imap->br_startoff,
+                        (unsigned long long)ret_imap->br_startblock,
-                        ret_imap->br_blockcount,ret_imap->br_state);
+                        (unsigned long long)ret_imap->br_startoff,
+                        (unsigned long long)ret_imap->br_blockcount,
+                        ret_imap->br_state);
        }
        return 0;
@@ -501,6 +569,63 @@ error_out:
        return XFS_ERROR(error);
 }
+/*
+ * If the caller is doing a write at the end of the file,
+ * then extend the allocation out to the file system's write
+ * iosize.  We clean up any extra space left over when the
+ * file is closed in xfs_inactive().
+ *
+ * For sync writes, we are flushing delayed allocate space to
+ * try to make additional space available for allocation near
+ * the filesystem full boundary - preallocation hurts in that
+ * situation, of course.
+ */
+STATIC int
+xfs_iomap_eof_want_preallocate(
+        xfs_mount_t     *mp,
+        xfs_iocore_t    *io,
+        xfs_fsize_t     isize,
+        xfs_off_t       offset,
+        size_t          count,
+        int             ioflag,
+        xfs_bmbt_irec_t *imap,
+        int             nimaps,
+        int             *prealloc)
+{
+        xfs_fileoff_t   start_fsb;
+        xfs_filblks_t   count_fsb;
+        xfs_fsblock_t   firstblock;
+        int             n, error, imaps;
+        *prealloc = 0;
+        if ((ioflag & BMAPI_SYNC) || (offset + count) <= isize)
+                return 0;
+        /*
+         * If there are any real blocks past eof, then don't
+         * do any speculative allocation.
+         */
+        start_fsb = XFS_B_TO_FSBT(mp, ((xfs_ufsize_t)(offset + count - 1)));
+        count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
+        while (count_fsb > 0) {
+                imaps = nimaps;
+                firstblock = NULLFSBLOCK;
+                error = XFS_BMAPI(mp, NULL, io, start_fsb, count_fsb,
+                                  0, &firstblock, 0, imap, &imaps, NULL);
+                if (error)
+                        return error;
+                for (n = 0; n < imaps; n++) {
+                        if ((imap[n].br_startblock != HOLESTARTBLOCK) &&
+                            (imap[n].br_startblock != DELAYSTARTBLOCK))
+                                return 0;
+                        start_fsb += imap[n].br_blockcount;
+                        count_fsb -= imap[n].br_blockcount;
+                }
+        }
+        *prealloc = 1;
+        return 0;
+}
 int
 xfs_iomap_write_delay(
        xfs_inode_t     *ip,
@@ -514,13 +639,15 @@ xfs_iomap_write_delay(
        xfs_iocore_t    *io = &ip->i_iocore;
        xfs_fileoff_t   offset_fsb;
        xfs_fileoff_t   last_fsb;
-        xfs_fsize_t     isize;
+        xfs_off_t       aligned_offset;
+        xfs_fileoff_t   ioalign;
        xfs_fsblock_t   firstblock;
+        xfs_extlen_t    extsz;
+        xfs_fsize_t     isize;
        int             nimaps;
-        int             error;
        xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS];
-        int             aeof;
+        int             prealloc, fsynced = 0;
-        int             fsynced = 0;
+        int             error;
        ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0);
@@ -528,152 +655,57 @@ xfs_iomap_write_delay(
         * Make sure that the dquots are there. This doesn't hold
         * the ilock across a disk read.
         */
        error = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED);
        if (error)
                return XFS_ERROR(error);
+        if (XFS_IS_REALTIME_INODE(ip)) {
+                if (!(extsz = ip->i_d.di_extsize))
+                        extsz = mp->m_sb.sb_rextsize;
+        } else {
+                extsz = ip->i_d.di_extsize;
+        }
+        offset_fsb = XFS_B_TO_FSBT(mp, offset);
 retry:
        isize = ip->i_d.di_size;
-        if (io->io_new_size > isize) {
+        if (io->io_new_size > isize)
                isize = io->io_new_size;
-        }
-        aeof = 0;
+        error = xfs_iomap_eof_want_preallocate(mp, io, isize, offset, count,
-        offset_fsb = XFS_B_TO_FSBT(mp, offset);
+                                ioflag, imap, XFS_WRITE_IMAPS, &prealloc);
-        last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
+        if (error)
-        /*
+                return error;
-         * If the caller is doing a write at the end of the file,
-         * then extend the allocation (and the buffer used for the write)
-         * out to the file system's write iosize.  We clean up any extra
-         * space left over when the file is closed in xfs_inactive().
-         *
-         * For sync writes, we are flushing delayed allocate space to
-         * try to make additional space available for allocation near
-         * the filesystem full boundary - preallocation hurts in that
-         * situation, of course.
-         */
-        if (!(ioflag & BMAPI_SYNC) && ((offset + count) > ip->i_d.di_size)) {
-                xfs_off_t       aligned_offset;
-                xfs_filblks_t   count_fsb;
-                unsigned int    iosize;
-                xfs_fileoff_t   ioalign;
-                int             n;
-                xfs_fileoff_t   start_fsb;
-                /*
+        if (prealloc) {
-                 * If there are any real blocks past eof, then don't
-                 * do any speculative allocation.
-                 */
-                start_fsb = XFS_B_TO_FSBT(mp,
-                                        ((xfs_ufsize_t)(offset + count - 1)));
-                count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
-                while (count_fsb > 0) {
-                        nimaps = XFS_WRITE_IMAPS;
-                        error = XFS_BMAPI(mp, NULL, io, start_fsb, count_fsb,
-                                        0, &firstblock, 0, imap, &nimaps, NULL);
-                        if (error) {
-                                return error;
-                        }
-                        for (n = 0; n < nimaps; n++) {
-                                if ( !(io->io_flags & XFS_IOCORE_RT)  && 
-                                        !imap[n].br_startblock) {
-                                        cmn_err(CE_PANIC,"Access to block "
-                                                "zero:  fs <%s> inode: %lld "
-                                                "start_block : %llx start_off "
-                                                ": %llx blkcnt : %llx "
-                                                "extent-state : %x \n",
-                                                (ip->i_mount)->m_fsname,
-                                                (long long)ip->i_ino,
-                                                imap[n].br_startblock,
-                                                imap[n].br_startoff,
-                                                imap[n].br_blockcount,
-                                                imap[n].br_state);
-                                }
-                                if ((imap[n].br_startblock != HOLESTARTBLOCK) &&
-                                    (imap[n].br_startblock != DELAYSTARTBLOCK)) {
-                                        goto write_map;
-                                }
-                                start_fsb += imap[n].br_blockcount;
-                                count_fsb -= imap[n].br_blockcount;
-                        }
-                }
-                iosize = mp->m_writeio_blocks;
                aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1));
                ioalign = XFS_B_TO_FSBT(mp, aligned_offset);
-                last_fsb = ioalign + iosize;
+                last_fsb = ioalign + mp->m_writeio_blocks;
-                aeof = 1;
+        } else {
+                last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
        }
-write_map:
-        nimaps = XFS_WRITE_IMAPS;
-        firstblock = NULLFSBLOCK;
-        /*
+        if (prealloc || extsz) {
-         * If mounted with the "-o swalloc" option, roundup the allocation
+                error = xfs_iomap_eof_align_last_fsb(mp, io, isize, extsz,
-         * request to a stripe width boundary if the file size is >=
+                                                        &last_fsb);
-         * stripe width and we are allocating past the allocation eof.
+                if (error)
-         */
-        if (!(io->io_flags & XFS_IOCORE_RT) && mp->m_swidth 
-            && (mp->m_flags & XFS_MOUNT_SWALLOC)
-            && (isize >= XFS_FSB_TO_B(mp, mp->m_swidth)) && aeof) {
-                int eof;
-                xfs_fileoff_t new_last_fsb;
-                new_last_fsb = roundup_64(last_fsb, mp->m_swidth);
-                error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof);
-                if (error) {
-                        return error;
-                }
-                if (eof) {
-                        last_fsb = new_last_fsb;
-                }
-        /*
-         * Roundup the allocation request to a stripe unit (m_dalign) boundary
-         * if the file size is >= stripe unit size, and we are allocating past
-         * the allocation eof.
-         */
-        } else if (!(io->io_flags & XFS_IOCORE_RT) && mp->m_dalign &&
-                   (isize >= XFS_FSB_TO_B(mp, mp->m_dalign)) && aeof) {
-                int eof;
-                xfs_fileoff_t new_last_fsb;
-                new_last_fsb = roundup_64(last_fsb, mp->m_dalign);
-                error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof);
-                if (error) {
-                        return error;
-                }
-                if (eof) {
-                        last_fsb = new_last_fsb;
-                }
-        /*
-         * Round up the allocation request to a real-time extent boundary
-         * if the file is on the real-time subvolume.
-         */
-        } else if (io->io_flags & XFS_IOCORE_RT && aeof) {
-                int eof;
-                xfs_fileoff_t new_last_fsb;
-                new_last_fsb = roundup_64(last_fsb, mp->m_sb.sb_rextsize);
-                error = XFS_BMAP_EOF(mp, io, new_last_fsb, XFS_DATA_FORK, &eof);
-                if (error) {
                        return error;
-                }
-                if (eof)
-                        last_fsb = new_last_fsb;
        }
+        nimaps = XFS_WRITE_IMAPS;
+        firstblock = NULLFSBLOCK;
        error = xfs_bmapi(NULL, ip, offset_fsb,
                          (xfs_filblks_t)(last_fsb - offset_fsb),
                          XFS_BMAPI_DELAY | XFS_BMAPI_WRITE |
                          XFS_BMAPI_ENTIRE, &firstblock, 1, imap,
                          &nimaps, NULL);
-        /*
+        if (error && (error != ENOSPC))
-         * This can be EDQUOT, if nimaps == 0
-         */
-        if (error && (error != ENOSPC)) {
                return XFS_ERROR(error);
-        }
        /*
         * If bmapi returned us nothing, and if we didn't get back EDQUOT,
-         * then we must have run out of space.
+         * then we must have run out of space - flush delalloc, and retry..
         */
        if (nimaps == 0) {
                xfs_iomap_enter_trace(XFS_IOMAP_WRITE_NOSPACE,
@@ -685,17 +717,21 @@ write_map:
                goto retry;
        }
-        *ret_imap = imap[0];
+        if (!(io->io_flags & XFS_IOCORE_RT)  && !ret_imap->br_startblock) {
-        *nmaps = 1;
-        if ( !(io->io_flags & XFS_IOCORE_RT)  && !ret_imap->br_startblock) {
                cmn_err(CE_PANIC,"Access to block zero:  fs <%s> inode: %lld "
                        "start_block : %llx start_off : %llx blkcnt : %llx "
                        "extent-state : %x \n",
                        (ip->i_mount)->m_fsname,
                        (long long)ip->i_ino,
-                        ret_imap->br_startblock, ret_imap->br_startoff,
+                        (unsigned long long)ret_imap->br_startblock,
-                        ret_imap->br_blockcount,ret_imap->br_state);
+                        (unsigned long long)ret_imap->br_startoff,
+                        (unsigned long long)ret_imap->br_blockcount,
+                        ret_imap->br_state);
        }
+        *ret_imap = imap[0];
+        *nmaps = 1;
        return 0;
 }
@@ -821,17 +857,21 @@ xfs_iomap_write_allocate(
                 */
                for (i = 0; i < nimaps; i++) {
-                        if ( !(io->io_flags & XFS_IOCORE_RT)  && 
+                        if (!(io->io_flags & XFS_IOCORE_RT)  &&
-                                !imap[i].br_startblock) {
+                            !imap[i].br_startblock) {
                                cmn_err(CE_PANIC,"Access to block zero:  "
                                        "fs <%s> inode: %lld "
-                                        "start_block : %llx start_off : %llx " 
+                                        "start_block : %llx start_off : %llx "
                                        "blkcnt : %llx extent-state : %x \n",
                                        (ip->i_mount)->m_fsname,
                                        (long long)ip->i_ino,
-                                        imap[i].br_startblock,
+                                        (unsigned long long)
-                                        imap[i].br_startoff,
+                                                imap[i].br_startblock,
-                                        imap[i].br_blockcount,imap[i].br_state);
+                                        (unsigned long long)
+                                                imap[i].br_startoff,
+                                        (unsigned long long)
+                                                imap[i].br_blockcount,
+                                        imap[i].br_state);
                        }
                        if ((offset_fsb >= imap[i].br_startoff) &&
                            (offset_fsb < (imap[i].br_startoff +
@@ -868,17 +908,17 @@ xfs_iomap_write_unwritten(
 {
        xfs_mount_t     *mp = ip->i_mount;
        xfs_iocore_t    *io = &ip->i_iocore;
-        xfs_trans_t     *tp;
        xfs_fileoff_t   offset_fsb;
        xfs_filblks_t   count_fsb;
        xfs_filblks_t   numblks_fsb;
-        xfs_bmbt_irec_t imap;
+        xfs_fsblock_t   firstfsb;
+        int             nimaps;
+        xfs_trans_t     *tp;
+        xfs_bmbt_irec_t imap;
+        xfs_bmap_free_t free_list;
+        uint            resblks;
        int             committed;
        int             error;
-        int             nres;
-        int             nimaps;
-        xfs_fsblock_t   firstfsb;
-        xfs_bmap_free_t free_list;
        xfs_iomap_enter_trace(XFS_IOMAP_UNWRITTEN,
                                &ip->i_iocore, offset, count);
@@ -887,9 +927,9 @@ xfs_iomap_write_unwritten(
        count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
        count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb);
-        do {
+        resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1;
-                nres = XFS_DIOSTRAT_SPACE_RES(mp, 0);
+        do {
                /*
                 * set up a transaction to convert the range of extents
                 * from unwritten to real. Do allocations in a loop until
@@ -897,7 +937,7 @@ xfs_iomap_write_unwritten(
                 */
                tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
-                error = xfs_trans_reserve(tp, nres,
+                error = xfs_trans_reserve(tp, resblks,
                                XFS_WRITE_LOG_RES(mp), 0,
                                XFS_TRANS_PERM_LOG_RES,
                                XFS_WRITE_LOG_COUNT);
@@ -916,7 +956,7 @@ xfs_iomap_write_unwritten(
                XFS_BMAP_INIT(&free_list, &firstfsb);
                nimaps = 1;
                error = xfs_bmapi(tp, ip, offset_fsb, count_fsb,
-                                  XFS_BMAPI_WRITE, &firstfsb,
+                                  XFS_BMAPI_WRITE|XFS_BMAPI_CONVERT, &firstfsb,
                                  1, &imap, &nimaps, &free_list);
                if (error)
                        goto error_on_bmapi_transaction;
@@ -930,15 +970,17 @@ xfs_iomap_write_unwritten(
                xfs_iunlock(ip, XFS_ILOCK_EXCL);
                if (error)
                        goto error0;
-                
                if ( !(io->io_flags & XFS_IOCORE_RT)  && !imap.br_startblock) {
                        cmn_err(CE_PANIC,"Access to block zero:  fs <%s> "
                                "inode: %lld start_block : %llx start_off : "
                                "%llx blkcnt : %llx extent-state : %x \n",
                                (ip->i_mount)->m_fsname,
                                (long long)ip->i_ino,
-                                imap.br_startblock,imap.br_startoff,
+                                (unsigned long long)imap.br_startblock,
-                                imap.br_blockcount,imap.br_state);
+                                (unsigned long long)imap.br_startoff,
+                                (unsigned long long)imap.br_blockcount,
+                                imap.br_state);
                }
                if ((numblks_fsb = imap.br_blockcount) == 0) {
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index f63646ead816..c59450e1be40 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -56,6 +56,7 @@ xfs_bulkstat_one_iget(
 {
        xfs_dinode_core_t *dic;         /* dinode core info pointer */
        xfs_inode_t     *ip;            /* incore inode pointer */
+        vnode_t         *vp;
        int             error;
        error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, bno);
@@ -72,6 +73,7 @@ xfs_bulkstat_one_iget(
                goto out_iput;
        }
+        vp = XFS_ITOV(ip);
        dic = &ip->i_d;
        /* xfs_iget returns the following without needing
@@ -84,8 +86,7 @@ xfs_bulkstat_one_iget(
        buf->bs_uid = dic->di_uid;
        buf->bs_gid = dic->di_gid;
        buf->bs_size = dic->di_size;
-        buf->bs_atime.tv_sec = dic->di_atime.t_sec;
+        vn_atime_to_bstime(vp, &buf->bs_atime);
-        buf->bs_atime.tv_nsec = dic->di_atime.t_nsec;
        buf->bs_mtime.tv_sec = dic->di_mtime.t_sec;
        buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec;
        buf->bs_ctime.tv_sec = dic->di_ctime.t_sec;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 29af51275ca9..9176995160ed 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -178,6 +178,83 @@ xlog_trace_iclog(xlog_in_core_t *iclog, uint state)
 #define xlog_trace_iclog(iclog,state)
 #endif /* XFS_LOG_TRACE */
+static void
+xlog_ins_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic)
+{
+        if (*qp) {
+                tic->t_next         = (*qp);
+                tic->t_prev         = (*qp)->t_prev;
+                (*qp)->t_prev->t_next = tic;
+                (*qp)->t_prev       = tic;
+        } else {
+                tic->t_prev = tic->t_next = tic;
+                *qp = tic;
+        }
+        tic->t_flags |= XLOG_TIC_IN_Q;
+}
+static void
+xlog_del_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic)
+{
+        if (tic == tic->t_next) {
+                *qp = NULL;
+        } else {
+                *qp = tic->t_next;
+                tic->t_next->t_prev = tic->t_prev;
+                tic->t_prev->t_next = tic->t_next;
+        }
+        tic->t_next = tic->t_prev = NULL;
+        tic->t_flags &= ~XLOG_TIC_IN_Q;
+}
+static void
+xlog_grant_sub_space(struct log *log, int bytes)
+{
+        log->l_grant_write_bytes -= bytes;
+        if (log->l_grant_write_bytes < 0) {
+                log->l_grant_write_bytes += log->l_logsize;
+                log->l_grant_write_cycle--;
+        }
+        log->l_grant_reserve_bytes -= bytes;
+        if ((log)->l_grant_reserve_bytes < 0) {
+                log->l_grant_reserve_bytes += log->l_logsize;
+                log->l_grant_reserve_cycle--;
+        }
+}
+static void
+xlog_grant_add_space_write(struct log *log, int bytes)
+{
+        log->l_grant_write_bytes += bytes;
+        if (log->l_grant_write_bytes > log->l_logsize) {
+                log->l_grant_write_bytes -= log->l_logsize;
+                log->l_grant_write_cycle++;
+        }
+}
+static void
+xlog_grant_add_space_reserve(struct log *log, int bytes)
+{
+        log->l_grant_reserve_bytes += bytes;
+        if (log->l_grant_reserve_bytes > log->l_logsize) {
+                log->l_grant_reserve_bytes -= log->l_logsize;
+                log->l_grant_reserve_cycle++;
+        }
+}
+static inline void
+xlog_grant_add_space(struct log *log, int bytes)
+{
+        xlog_grant_add_space_write(log, bytes);
+        xlog_grant_add_space_reserve(log, bytes);
+}
 /*
 * NOTES:
 *
@@ -326,7 +403,7 @@ xfs_log_release_iclog(xfs_mount_t *mp,
        if (xlog_state_release_iclog(log, iclog)) {
                xfs_force_shutdown(mp, XFS_LOG_IO_ERROR);
-                return(EIO);
+                return EIO;
        }
        return 0;
@@ -428,7 +505,7 @@ xfs_log_mount(xfs_mount_t	*mp,
                if (readonly)
                        vfsp->vfs_flag &= ~VFS_RDONLY;
-                error = xlog_recover(mp->m_log, readonly);
+                error = xlog_recover(mp->m_log);
                if (readonly)
                        vfsp->vfs_flag |= VFS_RDONLY;
@@ -479,7 +556,7 @@ xfs_log_unmount(xfs_mount_t *mp)
        error = xfs_log_unmount_write(mp);
        xfs_log_unmount_dealloc(mp);
-        return (error);
+        return error;
 }
 /*
@@ -651,7 +728,7 @@ xfs_log_write(xfs_mount_t *	mp,
        if ((error = xlog_write(mp, reg, nentries, tic, start_lsn, NULL, 0))) {
                xfs_force_shutdown(mp, XFS_LOG_IO_ERROR);
        }
-        return (error);
+        return error;
 }       /* xfs_log_write */
@@ -759,7 +836,7 @@ xfs_log_need_covered(xfs_mount_t *mp)
                needed = 1;
        }
        LOG_UNLOCK(log, s);
-        return(needed);
+        return needed;
 }
 /******************************************************************************
@@ -926,7 +1003,7 @@ xlog_bdstrat_cb(struct xfs_buf *bp)
        XFS_BUF_ERROR(bp, EIO);
        XFS_BUF_STALE(bp);
        xfs_biodone(bp);
-        return (XFS_ERROR(EIO));
+        return XFS_ERROR(EIO);
 }
@@ -1186,7 +1263,7 @@ xlog_commit_record(xfs_mount_t  *mp,
                               iclog, XLOG_COMMIT_TRANS))) {
                xfs_force_shutdown(mp, XFS_LOG_IO_ERROR);
        }
-        return (error);
+        return error;
 }       /* xlog_commit_record */
@@ -1320,8 +1397,7 @@ xlog_sync(xlog_t		*log,
        /* move grant heads by roundoff in sync */
        s = GRANT_LOCK(log);
-        XLOG_GRANT_ADD_SPACE(log, roundoff, 'w');
+        xlog_grant_add_space(log, roundoff);
-        XLOG_GRANT_ADD_SPACE(log, roundoff, 'r');
        GRANT_UNLOCK(log, s);
        /* put cycle number in every block */
@@ -1384,7 +1460,7 @@ xlog_sync(xlog_t		*log,
        if ((error = XFS_bwrite(bp))) {
                xfs_ioerror_alert("xlog_sync", log->l_mp, bp,
                                  XFS_BUF_ADDR(bp));
-                return (error);
+                return error;
        }
        if (split) {
                bp              = iclog->ic_log->l_xbuf;
@@ -1422,10 +1498,10 @@ xlog_sync(xlog_t		*log,
                if ((error = XFS_bwrite(bp))) {
                        xfs_ioerror_alert("xlog_sync (split)", log->l_mp,
                                          bp, XFS_BUF_ADDR(bp));
-                        return (error);
+                        return error;
                }
        }
-        return (0);
+        return 0;
 }       /* xlog_sync */
@@ -1515,7 +1591,6 @@ xlog_state_finish_copy(xlog_t		*log,
 * print out info relating to regions written which consume
 * the reservation
 */
-#if defined(XFS_LOG_RES_DEBUG)
 STATIC void
 xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)
 {
@@ -1605,11 +1680,11 @@ xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)
                        ticket->t_res_arr_sum, ticket->t_res_o_flow,
                        ticket->t_res_num_ophdrs, ophdr_spc,
                        ticket->t_res_arr_sum + 
-                          ticket->t_res_o_flow + ophdr_spc,
+                        ticket->t_res_o_flow + ophdr_spc,
                        ticket->t_res_num);
        for (i = 0; i < ticket->t_res_num; i++) {
-                uint r_type = ticket->t_res_arr[i].r_type; 
+                uint r_type = ticket->t_res_arr[i].r_type; 
                cmn_err(CE_WARN,
                            "region[%u]: %s - %u bytes\n",
                            i, 
@@ -1618,9 +1693,6 @@ xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)
                            ticket->t_res_arr[i].r_len);
        }
 }
-#else
-#define xlog_print_tic_res(mp, ticket)
-#endif
 /*
 * Write some region out to in-core log
@@ -1726,7 +1798,7 @@ xlog_write(xfs_mount_t *	mp,
    for (index = 0; index < nentries; ) {
        if ((error = xlog_state_get_iclog_space(log, len, &iclog, ticket,
                                               &contwr, &log_offset)))
-                return (error);
+                return error;
        ASSERT(log_offset <= iclog->ic_size - 1);
        ptr = (__psint_t) ((char *)iclog->ic_datap+log_offset);
@@ -1831,7 +1903,7 @@ xlog_write(xfs_mount_t *	mp,
                    xlog_state_finish_copy(log, iclog, record_cnt, data_cnt);
                    record_cnt = data_cnt = 0;
                    if ((error = xlog_state_release_iclog(log, iclog)))
-                            return (error);
+                            return error;
                    break;                      /* don't increment index */
            } else {                            /* copied entire region */
                index++;
@@ -1845,7 +1917,7 @@ xlog_write(xfs_mount_t *	mp,
                        ASSERT(flags & XLOG_COMMIT_TRANS);
                        *commit_iclog = iclog;
                    } else if ((error = xlog_state_release_iclog(log, iclog)))
-                           return (error);
+                           return error;
                    if (index == nentries)
                            return 0;           /* we are done */
                    else
@@ -1862,7 +1934,7 @@ xlog_write(xfs_mount_t *	mp,
        *commit_iclog = iclog;
        return 0;
    }
-    return (xlog_state_release_iclog(log, iclog));
+    return xlog_state_release_iclog(log, iclog);
 }       /* xlog_write */
@@ -1978,7 +2050,7 @@ xlog_get_lowest_lsn(
            }
            lsn_log = lsn_log->ic_next;
        } while (lsn_log != log->l_iclog);
-        return(lowest_lsn);
+        return lowest_lsn;
 }
@@ -2330,7 +2402,7 @@ restart:
                if (iclog->ic_refcnt == 1) {
                        LOG_UNLOCK(log, s);
                        if ((error = xlog_state_release_iclog(log, iclog)))
-                                return (error);
+                                return error;
                } else {
                        iclog->ic_refcnt--;
                        LOG_UNLOCK(log, s);
@@ -2389,7 +2461,7 @@ xlog_grant_log_space(xlog_t	   *log,
        /* something is already sleeping; insert new transaction at end */
        if (log->l_reserve_headq) {
-                XLOG_INS_TICKETQ(log->l_reserve_headq, tic);
+                xlog_ins_ticketq(&log->l_reserve_headq, tic);
                xlog_trace_loggrant(log, tic,
                                    "xlog_grant_log_space: sleep 1");
                /*
@@ -2422,7 +2494,7 @@ redo:
                                     log->l_grant_reserve_bytes);
        if (free_bytes < need_bytes) {
                if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
-                        XLOG_INS_TICKETQ(log->l_reserve_headq, tic);
+                        xlog_ins_ticketq(&log->l_reserve_headq, tic);
                xlog_trace_loggrant(log, tic,
                                    "xlog_grant_log_space: sleep 2");
                XFS_STATS_INC(xs_sleep_logspace);
@@ -2439,11 +2511,10 @@ redo:
                s = GRANT_LOCK(log);
                goto redo;
        } else if (tic->t_flags & XLOG_TIC_IN_Q)
-                XLOG_DEL_TICKETQ(log->l_reserve_headq, tic);
+                xlog_del_ticketq(&log->l_reserve_headq, tic);
        /* we've got enough space */
-        XLOG_GRANT_ADD_SPACE(log, need_bytes, 'w');
+        xlog_grant_add_space(log, need_bytes);
-        XLOG_GRANT_ADD_SPACE(log, need_bytes, 'r');
 #ifdef DEBUG
        tail_lsn = log->l_tail_lsn;
        /*
@@ -2464,7 +2535,7 @@ redo:
 error_return:
        if (tic->t_flags & XLOG_TIC_IN_Q)
-                XLOG_DEL_TICKETQ(log->l_reserve_headq, tic);
+                xlog_del_ticketq(&log->l_reserve_headq, tic);
        xlog_trace_loggrant(log, tic, "xlog_grant_log_space: err_ret");
        /*
         * If we are failing, make sure the ticket doesn't have any
@@ -2498,7 +2569,7 @@ xlog_regrant_write_log_space(xlog_t	   *log,
        XLOG_TIC_RESET_RES(tic);
        if (tic->t_cnt > 0)
-                return (0);
+                return 0;
 #ifdef DEBUG
        if (log->l_flags & XLOG_ACTIVE_RECOVERY)
@@ -2533,7 +2604,7 @@ xlog_regrant_write_log_space(xlog_t	   *log,
                if (ntic != log->l_write_headq) {
                        if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
-                                XLOG_INS_TICKETQ(log->l_write_headq, tic);
+                                xlog_ins_ticketq(&log->l_write_headq, tic);
                        xlog_trace_loggrant(log, tic,
                                    "xlog_regrant_write_log_space: sleep 1");
@@ -2565,7 +2636,7 @@ redo:
                                     log->l_grant_write_bytes);
        if (free_bytes < need_bytes) {
                if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
-                        XLOG_INS_TICKETQ(log->l_write_headq, tic);
+                        xlog_ins_ticketq(&log->l_write_headq, tic);
                XFS_STATS_INC(xs_sleep_logspace);
                sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s);
@@ -2581,9 +2652,10 @@ redo:
                s = GRANT_LOCK(log);
                goto redo;
        } else if (tic->t_flags & XLOG_TIC_IN_Q)
-                XLOG_DEL_TICKETQ(log->l_write_headq, tic);
+                xlog_del_ticketq(&log->l_write_headq, tic);
-        XLOG_GRANT_ADD_SPACE(log, need_bytes, 'w'); /* we've got enough space */
+        /* we've got enough space */
+        xlog_grant_add_space_write(log, need_bytes);
 #ifdef DEBUG
        tail_lsn = log->l_tail_lsn;
        if (CYCLE_LSN(tail_lsn) != log->l_grant_write_cycle) {
@@ -2595,12 +2667,12 @@ redo:
        xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: exit");
        xlog_verify_grant_head(log, 1);
        GRANT_UNLOCK(log, s);
-        return (0);
+        return 0;
 error_return:
        if (tic->t_flags & XLOG_TIC_IN_Q)
-                XLOG_DEL_TICKETQ(log->l_reserve_headq, tic);
+                xlog_del_ticketq(&log->l_reserve_headq, tic);
        xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: err_ret");
        /*
         * If we are failing, make sure the ticket doesn't have any
@@ -2633,8 +2705,7 @@ xlog_regrant_reserve_log_space(xlog_t	     *log,
                ticket->t_cnt--;
        s = GRANT_LOCK(log);
-        XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'w');
+        xlog_grant_sub_space(log, ticket->t_curr_res);
-        XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'r');
        ticket->t_curr_res = ticket->t_unit_res;
        XLOG_TIC_RESET_RES(ticket);
        xlog_trace_loggrant(log, ticket,
@@ -2647,7 +2718,7 @@ xlog_regrant_reserve_log_space(xlog_t	     *log,
                return;
        }
-        XLOG_GRANT_ADD_SPACE(log, ticket->t_unit_res, 'r');
+        xlog_grant_add_space_reserve(log, ticket->t_unit_res);
        xlog_trace_loggrant(log, ticket,
                            "xlog_regrant_reserve_log_space: exit");
        xlog_verify_grant_head(log, 0);
@@ -2683,8 +2754,7 @@ xlog_ungrant_log_space(xlog_t	     *log,
        s = GRANT_LOCK(log);
        xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: enter");
-        XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'w');
+        xlog_grant_sub_space(log, ticket->t_curr_res);
-        XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'r');
        xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: sub current");
@@ -2693,8 +2763,7 @@ xlog_ungrant_log_space(xlog_t	     *log,
         */
        if (ticket->t_cnt > 0) {
                ASSERT(ticket->t_flags & XLOG_TIC_PERM_RESERV);
-                XLOG_GRANT_SUB_SPACE(log, ticket->t_unit_res*ticket->t_cnt,'w');
+                xlog_grant_sub_space(log, ticket->t_unit_res*ticket->t_cnt);
-                XLOG_GRANT_SUB_SPACE(log, ticket->t_unit_res*ticket->t_cnt,'r');
        }
        xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: exit");
@@ -2768,7 +2837,7 @@ xlog_state_release_iclog(xlog_t		*log,
        if (sync) {
                return xlog_sync(log, iclog);
        }
-        return (0);
+        return 0;
 }       /* xlog_state_release_iclog */
@@ -3058,7 +3127,7 @@ try_again:
    } while (iclog != log->l_iclog);
    LOG_UNLOCK(log, s);
-    return (0);
+    return 0;
 }       /* xlog_state_sync */
@@ -3476,12 +3545,12 @@ xlog_state_ioerror(
                        ic->ic_state = XLOG_STATE_IOERROR;
                        ic = ic->ic_next;
                } while (ic != iclog);
-                return (0);
+                return 0;
        }
        /*
         * Return non-zero, if state transition has already happened.
         */
-        return (1);
+        return 1;
 }
 /*
@@ -3518,7 +3587,7 @@ xfs_log_force_umount(
            log->l_flags & XLOG_ACTIVE_RECOVERY) {
                mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN;
                XFS_BUF_DONE(mp->m_sb_bp);
-                return (0);
+                return 0;
        }
        /*
@@ -3527,7 +3596,7 @@ xfs_log_force_umount(
         */
        if (logerror && log->l_iclog->ic_state & XLOG_STATE_IOERROR) {
                ASSERT(XLOG_FORCED_SHUTDOWN(log));
-                return (1);
+                return 1;
        }
        retval = 0;
        /*
@@ -3609,7 +3678,7 @@ xfs_log_force_umount(
        }
 #endif
        /* return non-zero if log IOERROR transition had already happened */
-        return (retval);
+        return retval;
 }
 STATIC int
@@ -3623,8 +3692,8 @@ xlog_iclogs_empty(xlog_t *log)
                 * any language.
                 */
                if (iclog->ic_header.h_num_logops)
-                        return(0);
+                        return 0;
                iclog = iclog->ic_next;
        } while (iclog != log->l_iclog);
-        return(1);
+        return 1;
 }
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 158829ca56f6..4b2ac88dbb83 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -30,13 +30,7 @@
 * By comparing each compnent, we don't have to worry about extra
 * endian issues in treating two 32 bit numbers as one 64 bit number
 */
-static
+static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
-#if defined(__GNUC__) && (__GNUC__ == 2) && ( (__GNUC_MINOR__ == 95) || (__GNUC_MINOR__ == 96))
-__attribute__((unused)) /* gcc 2.95, 2.96 miscompile this when inlined */
-#else
-__inline__
-#endif
-xfs_lsn_t       _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
 {
        if (CYCLE_LSN(lsn1) != CYCLE_LSN(lsn2))
                return (CYCLE_LSN(lsn1)<CYCLE_LSN(lsn2))? -999 : 999;
@@ -102,7 +96,6 @@ xfs_lsn_t	_lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
 /* Region types for iovec's i_type */
-#if defined(XFS_LOG_RES_DEBUG)
 #define XLOG_REG_TYPE_BFORMAT           1
 #define XLOG_REG_TYPE_BCHUNK            2
 #define XLOG_REG_TYPE_EFI_FORMAT        3
@@ -123,21 +116,13 @@ xfs_lsn_t	_lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
 #define XLOG_REG_TYPE_COMMIT            18
 #define XLOG_REG_TYPE_TRANSHDR          19
 #define XLOG_REG_TYPE_MAX               19
-#endif
-#if defined(XFS_LOG_RES_DEBUG)
 #define XLOG_VEC_SET_TYPE(vecp, t) ((vecp)->i_type = (t))
-#else
-#define XLOG_VEC_SET_TYPE(vecp, t)
-#endif
 typedef struct xfs_log_iovec {
        xfs_caddr_t             i_addr;         /* beginning address of region */
        int             i_len;          /* length in bytes of region */
-#if defined(XFS_LOG_RES_DEBUG)
+        uint            i_type;         /* type of region */
-        uint            i_type;         /* type of region */
-#endif
 } xfs_log_iovec_t;
 typedef void* xfs_log_ticket_t;
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 4518b188ade6..34bcbf50789c 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -253,7 +253,6 @@ typedef __uint32_t xlog_tid_t;
 /* Ticket reservation region accounting */ 
-#if defined(XFS_LOG_RES_DEBUG)
 #define XLOG_TIC_LEN_MAX        15
 #define XLOG_TIC_RESET_RES(t) ((t)->t_res_num = \
                                (t)->t_res_arr_sum = (t)->t_res_num_ophdrs = 0)
@@ -278,15 +277,9 @@ typedef __uint32_t xlog_tid_t;
 * we don't care about.
 */
 typedef struct xlog_res {
-        uint    r_len;
+        uint    r_len;  /* region length                :4 */
-        uint    r_type;
+        uint    r_type; /* region's transaction type    :4 */
 } xlog_res_t;
-#else
-#define XLOG_TIC_RESET_RES(t)
-#define XLOG_TIC_ADD_OPHDR(t)
-#define XLOG_TIC_ADD_REGION(t, len, type)
-#endif
 typedef struct xlog_ticket {
        sv_t               t_sema;       /* sleep on this semaphore      : 20 */
@@ -301,14 +294,12 @@ typedef struct xlog_ticket {
        char               t_flags;      /* properties of reservation    : 1  */
        uint               t_trans_type; /* transaction type             : 4  */
-#if defined (XFS_LOG_RES_DEBUG)
        /* reservation array fields */
        uint               t_res_num;                    /* num in array : 4 */
-        xlog_res_t         t_res_arr[XLOG_TIC_LEN_MAX];  /* array of res : X */ 
        uint               t_res_num_ophdrs;             /* num op hdrs  : 4 */
        uint               t_res_arr_sum;                /* array sum    : 4 */
        uint               t_res_o_flow;                 /* sum overflow : 4 */
-#endif
+        xlog_res_t         t_res_arr[XLOG_TIC_LEN_MAX];  /* array of res : 8 * 15 */ 
 } xlog_ticket_t;
 #endif
@@ -494,71 +485,13 @@ typedef struct log {
 #define XLOG_FORCED_SHUTDOWN(log)       ((log)->l_flags & XLOG_IO_ERROR)
-#define XLOG_GRANT_SUB_SPACE(log,bytes,type)                            \
-    {                                                                   \
-        if (type == 'w') {                                              \
-                (log)->l_grant_write_bytes -= (bytes);                  \
-                if ((log)->l_grant_write_bytes < 0) {                   \
-                        (log)->l_grant_write_bytes += (log)->l_logsize; \
-                        (log)->l_grant_write_cycle--;                   \
-                }                                                       \
-        } else {                                                        \
-                (log)->l_grant_reserve_bytes -= (bytes);                \
-                if ((log)->l_grant_reserve_bytes < 0) {                 \
-                        (log)->l_grant_reserve_bytes += (log)->l_logsize;\
-                        (log)->l_grant_reserve_cycle--;                 \
-                }                                                       \
-         }                                                              \
-    }
-#define XLOG_GRANT_ADD_SPACE(log,bytes,type)                            \
-    {                                                                   \
-        if (type == 'w') {                                              \
-                (log)->l_grant_write_bytes += (bytes);                  \
-                if ((log)->l_grant_write_bytes > (log)->l_logsize) {    \
-                        (log)->l_grant_write_bytes -= (log)->l_logsize; \
-                        (log)->l_grant_write_cycle++;                   \
-                }                                                       \
-        } else {                                                        \
-                (log)->l_grant_reserve_bytes += (bytes);                \
-                if ((log)->l_grant_reserve_bytes > (log)->l_logsize) {  \
-                        (log)->l_grant_reserve_bytes -= (log)->l_logsize;\
-                        (log)->l_grant_reserve_cycle++;                 \
-                }                                                       \
-         }                                                              \
-    }
-#define XLOG_INS_TICKETQ(q, tic)                        \
-    {                                                   \
-        if (q) {                                        \
-                (tic)->t_next       = (q);              \
-                (tic)->t_prev       = (q)->t_prev;      \
-                (q)->t_prev->t_next = (tic);            \
-                (q)->t_prev         = (tic);            \
-        } else {                                        \
-                (tic)->t_prev = (tic)->t_next = (tic);  \
-                (q) = (tic);                            \
-        }                                               \
-        (tic)->t_flags |= XLOG_TIC_IN_Q;                \
-    }
-#define XLOG_DEL_TICKETQ(q, tic)                        \
-    {                                                   \
-        if ((tic) == (tic)->t_next) {                   \
-                (q) = NULL;                             \
-        } else {                                        \
-                (q) = (tic)->t_next;                    \
-                (tic)->t_next->t_prev = (tic)->t_prev;  \
-                (tic)->t_prev->t_next = (tic)->t_next;  \
-        }                                               \
-        (tic)->t_next = (tic)->t_prev = NULL;           \
-        (tic)->t_flags &= ~XLOG_TIC_IN_Q;               \
-    }
 /* common routines */
 extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp);
 extern int       xlog_find_tail(xlog_t  *log,
                                xfs_daddr_t *head_blk,
-                                xfs_daddr_t *tail_blk,
+                                xfs_daddr_t *tail_blk);
-                                int readonly);
+extern int       xlog_recover(xlog_t *log);
-extern int       xlog_recover(xlog_t *log, int readonly);
 extern int       xlog_recover_finish(xlog_t *log, int mfsi_flags);
 extern void      xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int);
 extern void      xlog_recover_process_iunlinks(xlog_t *log);
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 8ab7df768063..7d46cbd6a07a 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -783,8 +783,7 @@ int
 xlog_find_tail(
        xlog_t                  *log,
        xfs_daddr_t             *head_blk,
-        xfs_daddr_t             *tail_blk,
+        xfs_daddr_t             *tail_blk)
-        int                     readonly)
 {
        xlog_rec_header_t       *rhead;
        xlog_op_header_t        *op_head;
@@ -2563,10 +2562,12 @@ xlog_recover_do_quotaoff_trans(
        /*
         * The logitem format's flag tells us if this was user quotaoff,
-         * group quotaoff or both.
+         * group/project quotaoff or both.
         */
        if (qoff_f->qf_flags & XFS_UQUOTA_ACCT)
                log->l_quotaoffs_flag |= XFS_DQ_USER;
+        if (qoff_f->qf_flags & XFS_PQUOTA_ACCT)
+                log->l_quotaoffs_flag |= XFS_DQ_PROJ;
        if (qoff_f->qf_flags & XFS_GQUOTA_ACCT)
                log->l_quotaoffs_flag |= XFS_DQ_GROUP;
@@ -3890,14 +3891,13 @@ xlog_do_recover(
 */
 int
 xlog_recover(
-        xlog_t          *log,
+        xlog_t          *log)
-        int             readonly)
 {
        xfs_daddr_t     head_blk, tail_blk;
        int             error;
        /* find the tail of the log */
-        if ((error = xlog_find_tail(log, &head_blk, &tail_blk, readonly)))
+        if ((error = xlog_find_tail(log, &head_blk, &tail_blk)))
                return error;
        if (tail_blk != head_blk) {
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 541d5dd474be..62188ea392c7 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -51,7 +51,7 @@ STATIC int	xfs_uuid_mount(xfs_mount_t *);
 STATIC void     xfs_uuid_unmount(xfs_mount_t *mp);
 STATIC void     xfs_unmountfs_wait(xfs_mount_t *);
-static struct {
+static const struct {
    short offset;
    short type;     /* 0 = integer
                * 1 = binary / string (no translation)
@@ -117,7 +117,7 @@ xfs_mount_init(void)
        AIL_LOCKINIT(&mp->m_ail_lock, "xfs_ail");
        spinlock_init(&mp->m_sb_lock, "xfs_sb");
-        mutex_init(&mp->m_ilock, MUTEX_DEFAULT, "xfs_ilock");
+        mutex_init(&mp->m_ilock);
        initnsema(&mp->m_growlock, 1, "xfs_grow");
        /*
         * Initialize the AIL.
@@ -646,7 +646,7 @@ xfs_mountfs(
        if (mp->m_sb_bp == NULL) {
                if ((error = xfs_readsb(mp))) {
-                        return (error);
+                        return error;
                }
        }
        xfs_mount_common(mp, sbp);
@@ -889,7 +889,7 @@ xfs_mountfs(
         * For client case we are done now
         */
        if (mfsi_flags & XFS_MFSI_CLIENT) {
-                return(0);
+                return 0;
        }
        /*
@@ -1077,8 +1077,7 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
        xfs_iflush_all(mp);
-        XFS_QM_DQPURGEALL(mp,
+        XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING);
-                XFS_QMOPT_UQUOTA | XFS_QMOPT_GQUOTA | XFS_QMOPT_UMOUNTING);
        /*
         * Flush out the log synchronously so that we know for sure
@@ -1183,7 +1182,7 @@ xfs_unmountfs_writesb(xfs_mount_t *mp)
                        xfs_fs_cmn_err(CE_ALERT, mp, "Superblock write error detected while unmounting.  Filesystem may not be marked shared readonly");
        }
        xfs_buf_relse(sbp);
-        return (error);
+        return error;
 }
 /*
@@ -1258,19 +1257,19 @@ xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field,
                lcounter += delta;
                if (lcounter < 0) {
                        ASSERT(0);
-                        return (XFS_ERROR(EINVAL));
+                        return XFS_ERROR(EINVAL);
                }
                mp->m_sb.sb_icount = lcounter;
-                return (0);
+                return 0;
        case XFS_SBS_IFREE:
                lcounter = (long long)mp->m_sb.sb_ifree;
                lcounter += delta;
                if (lcounter < 0) {
                        ASSERT(0);
-                        return (XFS_ERROR(EINVAL));
+                        return XFS_ERROR(EINVAL);
                }
                mp->m_sb.sb_ifree = lcounter;
-                return (0);
+                return 0;
        case XFS_SBS_FDBLOCKS:
                lcounter = (long long)mp->m_sb.sb_fdblocks;
@@ -1297,101 +1296,101 @@ xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field,
                                if (rsvd) {
                                        lcounter = (long long)mp->m_resblks_avail + delta;
                                        if (lcounter < 0) {
-                                                return (XFS_ERROR(ENOSPC));
+                                                return XFS_ERROR(ENOSPC);
                                        }
                                        mp->m_resblks_avail = lcounter;
-                                        return (0);
+                                        return 0;
                                } else {        /* not reserved */
-                                        return (XFS_ERROR(ENOSPC));
+                                        return XFS_ERROR(ENOSPC);
                                }
                        }
                }
                mp->m_sb.sb_fdblocks = lcounter;
-                return (0);
+                return 0;
        case XFS_SBS_FREXTENTS:
                lcounter = (long long)mp->m_sb.sb_frextents;
                lcounter += delta;
                if (lcounter < 0) {
-                        return (XFS_ERROR(ENOSPC));
+                        return XFS_ERROR(ENOSPC);
                }
                mp->m_sb.sb_frextents = lcounter;
-                return (0);
+                return 0;
        case XFS_SBS_DBLOCKS:
                lcounter = (long long)mp->m_sb.sb_dblocks;
                lcounter += delta;
                if (lcounter < 0) {
                        ASSERT(0);
-                        return (XFS_ERROR(EINVAL));
+                        return XFS_ERROR(EINVAL);
                }
                mp->m_sb.sb_dblocks = lcounter;
-                return (0);
+                return 0;
        case XFS_SBS_AGCOUNT:
                scounter = mp->m_sb.sb_agcount;
                scounter += delta;
                if (scounter < 0) {
                        ASSERT(0);
-                        return (XFS_ERROR(EINVAL));
+                        return XFS_ERROR(EINVAL);
                }
                mp->m_sb.sb_agcount = scounter;
-                return (0);
+                return 0;
        case XFS_SBS_IMAX_PCT:
                scounter = mp->m_sb.sb_imax_pct;
                scounter += delta;
                if (scounter < 0) {
                        ASSERT(0);
-                        return (XFS_ERROR(EINVAL));
+                        return XFS_ERROR(EINVAL);
                }
                mp->m_sb.sb_imax_pct = scounter;
-                return (0);
+                return 0;
        case XFS_SBS_REXTSIZE:
                scounter = mp->m_sb.sb_rextsize;
                scounter += delta;
                if (scounter < 0) {
                        ASSERT(0);
-                        return (XFS_ERROR(EINVAL));
+                        return XFS_ERROR(EINVAL);
                }
                mp->m_sb.sb_rextsize = scounter;
-                return (0);
+                return 0;
        case XFS_SBS_RBMBLOCKS:
                scounter = mp->m_sb.sb_rbmblocks;
                scounter += delta;
                if (scounter < 0) {
                        ASSERT(0);
-                        return (XFS_ERROR(EINVAL));
+                        return XFS_ERROR(EINVAL);
                }
                mp->m_sb.sb_rbmblocks = scounter;
-                return (0);
+                return 0;
        case XFS_SBS_RBLOCKS:
                lcounter = (long long)mp->m_sb.sb_rblocks;
                lcounter += delta;
                if (lcounter < 0) {
                        ASSERT(0);
-                        return (XFS_ERROR(EINVAL));
+                        return XFS_ERROR(EINVAL);
                }
                mp->m_sb.sb_rblocks = lcounter;
-                return (0);
+                return 0;
        case XFS_SBS_REXTENTS:
                lcounter = (long long)mp->m_sb.sb_rextents;
                lcounter += delta;
                if (lcounter < 0) {
                        ASSERT(0);
-                        return (XFS_ERROR(EINVAL));
+                        return XFS_ERROR(EINVAL);
                }
                mp->m_sb.sb_rextents = lcounter;
-                return (0);
+                return 0;
        case XFS_SBS_REXTSLOG:
                scounter = mp->m_sb.sb_rextslog;
                scounter += delta;
                if (scounter < 0) {
                        ASSERT(0);
-                        return (XFS_ERROR(EINVAL));
+                        return XFS_ERROR(EINVAL);
                }
                mp->m_sb.sb_rextslog = scounter;
-                return (0);
+                return 0;
        default:
                ASSERT(0);
-                return (XFS_ERROR(EINVAL));
+                return XFS_ERROR(EINVAL);
        }
 }
@@ -1410,7 +1409,7 @@ xfs_mod_incore_sb(xfs_mount_t *mp, xfs_sb_field_t field, int delta, int rsvd)
        s = XFS_SB_LOCK(mp);
        status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
        XFS_SB_UNLOCK(mp, s);
-        return (status);
+        return status;
 }
 /*
@@ -1471,7 +1470,7 @@ xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd)
                }
        }
        XFS_SB_UNLOCK(mp, s);
-        return (status);
+        return status;
 }
 /*
@@ -1501,7 +1500,7 @@ xfs_getsb(
        }
        XFS_BUF_HOLD(bp);
        ASSERT(XFS_BUF_ISDONE(bp));
-        return (bp);
+        return bp;
 }
 /*
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 08b2e0a5d807..cd3cf9613a00 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -308,7 +308,6 @@ typedef struct xfs_mount {
        xfs_buftarg_t           *m_ddev_targp;  /* saves taking the address */
        xfs_buftarg_t           *m_logdev_targp;/* ptr to log device */
        xfs_buftarg_t           *m_rtdev_targp; /* ptr to rt device */
-#define m_dev           m_ddev_targp->pbr_dev
        __uint8_t               m_dircook_elog; /* log d-cookie entry bits */
        __uint8_t               m_blkbit_log;   /* blocklog + NBBY */
        __uint8_t               m_blkbb_log;    /* blocklog - BBSHIFT */
@@ -393,7 +392,7 @@ typedef struct xfs_mount {
                                                   user */
 #define XFS_MOUNT_NOALIGN       (1ULL << 7)     /* turn off stripe alignment
                                                   allocations */
-#define XFS_MOUNT_COMPAT_ATTR   (1ULL << 8)     /* do not use attr2 format */
+#define XFS_MOUNT_ATTR2         (1ULL << 8)     /* allow use of attr2 format */
                             /* (1ULL << 9)     -- currently unused */
 #define XFS_MOUNT_NORECOVERY    (1ULL << 10)    /* no recovery - dirty fs */
 #define XFS_MOUNT_SHARED        (1ULL << 11)    /* shared mount */
@@ -533,7 +532,7 @@ typedef struct xfs_mod_sb {
        int             msb_delta;      /* Change to make to specified field */
 } xfs_mod_sb_t;
-#define XFS_MOUNT_ILOCK(mp)     mutex_lock(&((mp)->m_ilock), PINOD)
+#define XFS_MOUNT_ILOCK(mp)     mutex_lock(&((mp)->m_ilock))
 #define XFS_MOUNT_IUNLOCK(mp)   mutex_unlock(&((mp)->m_ilock))
 #define XFS_SB_LOCK(mp)         mutex_spinlock(&(mp)->m_sb_lock)
 #define XFS_SB_UNLOCK(mp,s)     mutex_spinunlock(&(mp)->m_sb_lock,(s))
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index 4d4e8f4e768e..81a05cfd77d2 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -243,7 +243,6 @@ xfs_rename(
        xfs_inode_t     *inodes[4];
        int             target_ip_dropped = 0;  /* dropped target_ip link? */
        vnode_t         *src_dir_vp;
-        bhv_desc_t      *target_dir_bdp;
        int             spaceres;
        int             target_link_zero = 0;
        int             num_inodes;
@@ -260,14 +259,12 @@ xfs_rename(
         * Find the XFS behavior descriptor for the target directory
         * vnode since it was not handed to us.
         */
-        target_dir_bdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(target_dir_vp),
+        target_dp = xfs_vtoi(target_dir_vp);
-                                                &xfs_vnodeops);
+        if (target_dp == NULL) {
-        if (target_dir_bdp == NULL) {
                return XFS_ERROR(EXDEV);
        }
        src_dp = XFS_BHVTOI(src_dir_bdp);
-        target_dp = XFS_BHVTOI(target_dir_bdp);
        mp = src_dp->i_mount;
        if (DM_EVENT_ENABLED(src_dir_vp->v_vfsp, src_dp, DM_EVENT_RENAME) ||
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index c4b20872f07d..a59c102cf214 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -238,6 +238,7 @@ xfs_bioerror_relse(
        }
        return (EIO);
 }
 /*
 * Prints out an ALERT message about I/O error.
 */
@@ -252,11 +253,9 @@ xfs_ioerror_alert(
 "I/O error in filesystem (\"%s\") meta-data dev %s block 0x%llx"
 "       (\"%s\") error %d buf count %zd",
                (!mp || !mp->m_fsname) ? "(fs name not set)" : mp->m_fsname,
-                XFS_BUFTARG_NAME(bp->pb_target),
+                XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)),
-                (__uint64_t)blkno,
+                (__uint64_t)blkno, func,
-                func,
+                XFS_BUF_GETERROR(bp), XFS_BUF_COUNT(bp));
-                XFS_BUF_GETERROR(bp),
-                XFS_BUF_COUNT(bp));
 }
 /*
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index 4a17d335f897..bf168a91ddb8 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -68,18 +68,6 @@ struct xfs_mount;
        (XFS_SB_VERSION_NUMBITS | \
         XFS_SB_VERSION_OKREALFBITS | \
         XFS_SB_VERSION_OKSASHFBITS)
-#define XFS_SB_VERSION_MKFS(ia,dia,extflag,dirv2,na,sflag,morebits)     \
-        (((ia) || (dia) || (extflag) || (dirv2) || (na) || (sflag) || \
-          (morebits)) ? \
-                (XFS_SB_VERSION_4 | \
-                 ((ia) ? XFS_SB_VERSION_ALIGNBIT : 0) | \
-                 ((dia) ? XFS_SB_VERSION_DALIGNBIT : 0) | \
-                 ((extflag) ? XFS_SB_VERSION_EXTFLGBIT : 0) | \
-                 ((dirv2) ? XFS_SB_VERSION_DIRV2BIT : 0) | \
-                 ((na) ? XFS_SB_VERSION_LOGV2BIT : 0) | \
-                 ((sflag) ? XFS_SB_VERSION_SECTORBIT : 0) | \
-                 ((morebits) ? XFS_SB_VERSION_MOREBITSBIT : 0)) : \
-                XFS_SB_VERSION_1)
 /*
 * There are two words to hold XFS "feature" bits: the original
@@ -105,11 +93,6 @@ struct xfs_mount;
        (XFS_SB_VERSION2_OKREALFBITS |  \
         XFS_SB_VERSION2_OKSASHFBITS )
-/*
- * mkfs macro to set up sb_features2 word
- */
-#define XFS_SB_VERSION2_MKFS(resvd1, sbcntr)    0
 typedef struct xfs_sb
 {
        __uint32_t      sb_magicnum;    /* magic number == XFS_SB_MAGIC */
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 279e043d7323..d3d714e6b32a 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1014,6 +1014,7 @@ xfs_trans_cancel(
        xfs_log_item_t          *lip;
        int                     i;
 #endif
+        xfs_mount_t             *mp = tp->t_mountp;
        /*
         * See if the caller is being too lazy to figure out if
@@ -1026,9 +1027,10 @@ xfs_trans_cancel(
         * filesystem.  This happens in paths where we detect
         * corruption and decide to give up.
         */
-        if ((tp->t_flags & XFS_TRANS_DIRTY) &&
+        if ((tp->t_flags & XFS_TRANS_DIRTY) && !XFS_FORCED_SHUTDOWN(mp)) {
-            !XFS_FORCED_SHUTDOWN(tp->t_mountp))
+                XFS_ERROR_REPORT("xfs_trans_cancel", XFS_ERRLEVEL_LOW, mp);
-                xfs_force_shutdown(tp->t_mountp, XFS_CORRUPT_INCORE);
+                xfs_force_shutdown(mp, XFS_CORRUPT_INCORE);
+        }
 #ifdef DEBUG
        if (!(flags & XFS_TRANS_ABORT)) {
                licp = &(tp->t_items);
@@ -1040,7 +1042,7 @@ xfs_trans_cancel(
                                }
                                lip = lidp->lid_item;
-                                if (!XFS_FORCED_SHUTDOWN(tp->t_mountp))
+                                if (!XFS_FORCED_SHUTDOWN(mp))
                                        ASSERT(!(lip->li_type == XFS_LI_EFD));
                        }
                        licp = licp->lic_next;
@@ -1048,7 +1050,7 @@ xfs_trans_cancel(
        }
 #endif
        xfs_trans_unreserve_and_mod_sb(tp);
-        XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(tp->t_mountp, tp);
+        XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(mp, tp);
        if (tp->t_ticket) {
                if (flags & XFS_TRANS_RELEASE_LOG_RES) {
@@ -1057,7 +1059,7 @@ xfs_trans_cancel(
                } else {
                        log_flags = 0;
                }
-                xfs_log_done(tp->t_mountp, tp->t_ticket, NULL, log_flags);
+                xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
        }
        /* mark this thread as no longer being in a transaction */
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index a889963fdd14..d77901c07f63 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -973,7 +973,6 @@ void		xfs_trans_bhold(xfs_trans_t *, struct xfs_buf *);
 void            xfs_trans_bhold_release(xfs_trans_t *, struct xfs_buf *);
 void            xfs_trans_binval(xfs_trans_t *, struct xfs_buf *);
 void            xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *);
-void            xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *);
 void            xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *);
 void            xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint);
 void            xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *);
diff --git a/fs/xfs/xfs_trans_item.c b/fs/xfs/xfs_trans_item.c
index 486147ef0e3d..1117d600d741 100644
--- a/fs/xfs/xfs_trans_item.c
+++ b/fs/xfs/xfs_trans_item.c
@@ -78,7 +78,7 @@ xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip)
                lidp->lid_size = 0;
                lip->li_desc = lidp;
                lip->li_mountp = tp->t_mountp;
-                return (lidp);
+                return lidp;
        }
        /*
@@ -119,7 +119,7 @@ xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip)
        lidp->lid_size = 0;
        lip->li_desc = lidp;
        lip->li_mountp = tp->t_mountp;
-        return (lidp);
+        return lidp;
 }
 /*
@@ -180,7 +180,7 @@ xfs_trans_find_item(xfs_trans_t	*tp, xfs_log_item_t *lip)
 {
        ASSERT(lip->li_desc != NULL);
-        return (lip->li_desc);
+        return lip->li_desc;
 }
@@ -219,10 +219,10 @@ xfs_trans_first_item(xfs_trans_t *tp)
                        continue;
                }
-                return (XFS_LIC_SLOT(licp, i));
+                return XFS_LIC_SLOT(licp, i);
        }
        cmn_err(CE_WARN, "xfs_trans_first_item() -- no first item");
-        return(NULL);
+        return NULL;
 }
@@ -252,7 +252,7 @@ xfs_trans_next_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp)
                        continue;
                }
-                return (XFS_LIC_SLOT(licp, i));
+                return XFS_LIC_SLOT(licp, i);
        }
        /*
@@ -261,7 +261,7 @@ xfs_trans_next_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp)
         * If there is no next chunk, return NULL.
         */
        if (licp->lic_next == NULL) {
-                return (NULL);
+                return NULL;
        }
        licp = licp->lic_next;
@@ -271,7 +271,7 @@ xfs_trans_next_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp)
                        continue;
                }
-                return (XFS_LIC_SLOT(licp, i));
+                return XFS_LIC_SLOT(licp, i);
        }
        ASSERT(0);
        /* NOTREACHED */
@@ -425,7 +425,7 @@ xfs_trans_unlock_chunk(
                }
        }
-        return (freed);
+        return freed;
 }
@@ -478,7 +478,7 @@ xfs_trans_add_busy(xfs_trans_t *tp, xfs_agnumber_t ag, xfs_extlen_t idx)
                 */
                lbsp->lbc_ag = ag;
                lbsp->lbc_idx = idx;
-                return (lbsp);
+                return lbsp;
        }
        /*
@@ -512,7 +512,7 @@ xfs_trans_add_busy(xfs_trans_t *tp, xfs_agnumber_t ag, xfs_extlen_t idx)
        tp->t_busy_free--;
        lbsp->lbc_ag = ag;
        lbsp->lbc_idx = idx;
-        return (lbsp);
+        return lbsp;
 }
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index fefe1d60377f..34654ec6ae10 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -55,16 +55,13 @@ xfs_get_dir_entry(
        xfs_inode_t     **ipp)
 {
        vnode_t         *vp;
-        bhv_desc_t      *bdp;
        vp = VNAME_TO_VNODE(dentry);
-        bdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(vp), &xfs_vnodeops);
-        if (!bdp) {
+        *ipp = xfs_vtoi(vp);
-                *ipp = NULL;
+        if (!*ipp)
                return XFS_ERROR(ENOENT);
-        }
        VN_HOLD(vp);
-        *ipp = XFS_BHVTOI(bdp);
        return 0;
 }
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 7bdbd991ab1c..b6ad370fab3d 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -53,6 +53,7 @@
 #include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_clnt.h"
+#include "xfs_fsops.h"
 STATIC int xfs_sync(bhv_desc_t *, int, cred_t *);
@@ -290,8 +291,8 @@ xfs_start_flags(
                mp->m_flags |= XFS_MOUNT_IDELETE;
        if (ap->flags & XFSMNT_DIRSYNC)
                mp->m_flags |= XFS_MOUNT_DIRSYNC;
-        if (ap->flags & XFSMNT_COMPAT_ATTR)
+        if (ap->flags & XFSMNT_ATTR2)
-                mp->m_flags |= XFS_MOUNT_COMPAT_ATTR;
+                mp->m_flags |= XFS_MOUNT_ATTR2;
        if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE)
                mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
@@ -312,6 +313,8 @@ xfs_start_flags(
                mp->m_flags |= XFS_MOUNT_NOUUID;
        if (ap->flags & XFSMNT_BARRIER)
                mp->m_flags |= XFS_MOUNT_BARRIER;
+        else
+                mp->m_flags &= ~XFS_MOUNT_BARRIER;
        return 0;
 }
@@ -330,10 +333,11 @@ xfs_finish_flags(
        /* Fail a mount where the logbuf is smaller then the log stripe */
        if (XFS_SB_VERSION_HASLOGV2(&mp->m_sb)) {
-                if ((ap->logbufsize == -1) &&
+                if ((ap->logbufsize <= 0) &&
                    (mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE)) {
                        mp->m_logbsize = mp->m_sb.sb_logsunit;
-                } else if (ap->logbufsize < mp->m_sb.sb_logsunit) {
+                } else if (ap->logbufsize > 0 &&
+                           ap->logbufsize < mp->m_sb.sb_logsunit) {
                        cmn_err(CE_WARN,
        "XFS: logbuf size must be greater than or equal to log stripe size");
                        return XFS_ERROR(EINVAL);
@@ -347,6 +351,10 @@ xfs_finish_flags(
                }
        }
+        if (XFS_SB_VERSION_HASATTR2(&mp->m_sb)) {
+                mp->m_flags |= XFS_MOUNT_ATTR2;
+        }
        /*
         * prohibit r/w mounts of read-only filesystems
         */
@@ -382,10 +390,6 @@ xfs_finish_flags(
                        return XFS_ERROR(EINVAL);
        }
-        if (XFS_SB_VERSION_HASATTR2(&mp->m_sb)) {
-                mp->m_flags &= ~XFS_MOUNT_COMPAT_ATTR;
-        }
        return 0;
 }
@@ -504,13 +508,13 @@ xfs_mount(
        if (error)
                goto error2;
+        if ((mp->m_flags & XFS_MOUNT_BARRIER) && !(vfsp->vfs_flag & VFS_RDONLY))
+                xfs_mountfs_check_barriers(mp);
        error = XFS_IOINIT(vfsp, args, flags);
        if (error)
                goto error2;
-        if ((args->flags & XFSMNT_BARRIER) &&
-            !(XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY))
-                xfs_mountfs_check_barriers(mp);
        return 0;
 error2:
@@ -655,6 +659,11 @@ xfs_mntupdate(
        else
                mp->m_flags &= ~XFS_MOUNT_NOATIME;
+        if (args->flags & XFSMNT_BARRIER)
+                mp->m_flags |= XFS_MOUNT_BARRIER;
+        else
+                mp->m_flags &= ~XFS_MOUNT_BARRIER;
        if ((vfsp->vfs_flag & VFS_RDONLY) &&
            !(*flags & MS_RDONLY)) {
                vfsp->vfs_flag &= ~VFS_RDONLY;
@@ -1634,6 +1643,7 @@ xfs_vget(
 #define MNTOPT_NORECOVERY   "norecovery"   /* don't run XFS recovery */
 #define MNTOPT_BARRIER  "barrier"       /* use writer barriers for log write and
                                         * unwritten extent conversion */
+#define MNTOPT_NOBARRIER "nobarrier"    /* .. disable */
 #define MNTOPT_OSYNCISOSYNC "osyncisosync" /* o_sync is REALLY o_sync */
 #define MNTOPT_64BITINODE   "inode64"   /* inodes can be allocated anywhere */
 #define MNTOPT_IKEEP    "ikeep"         /* do not free empty inode clusters */
@@ -1680,7 +1690,6 @@ xfs_parseargs(
        int                     iosize;
        args->flags2 |= XFSMNT2_COMPAT_IOSIZE;
-        args->flags |= XFSMNT_COMPAT_ATTR;
 #if 0   /* XXX: off by default, until some remaining issues ironed out */
        args->flags |= XFSMNT_IDELETE; /* default to on */
@@ -1806,6 +1815,8 @@ xfs_parseargs(
                        args->flags |= XFSMNT_NOUUID;
                } else if (!strcmp(this_char, MNTOPT_BARRIER)) {
                        args->flags |= XFSMNT_BARRIER;
+                } else if (!strcmp(this_char, MNTOPT_NOBARRIER)) {
+                        args->flags &= ~XFSMNT_BARRIER;
                } else if (!strcmp(this_char, MNTOPT_IKEEP)) {
                        args->flags &= ~XFSMNT_IDELETE;
                } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
@@ -1815,9 +1826,9 @@ xfs_parseargs(
                } else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) {
                        args->flags2 |= XFSMNT2_COMPAT_IOSIZE;
                } else if (!strcmp(this_char, MNTOPT_ATTR2)) {
-                        args->flags &= ~XFSMNT_COMPAT_ATTR;
+                        args->flags |= XFSMNT_ATTR2;
                } else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
-                        args->flags |= XFSMNT_COMPAT_ATTR;
+                        args->flags &= ~XFSMNT_ATTR2;
                } else if (!strcmp(this_char, "osyncisdsync")) {
                        /* no-op, this is now the default */
 printk("XFS: osyncisdsync is now the default, option is deprecated.\n");
@@ -1892,7 +1903,6 @@ xfs_showargs(
                { XFS_MOUNT_NOUUID,             "," MNTOPT_NOUUID },
                { XFS_MOUNT_NORECOVERY,         "," MNTOPT_NORECOVERY },
                { XFS_MOUNT_OSYNCISOSYNC,       "," MNTOPT_OSYNCISOSYNC },
-                { XFS_MOUNT_BARRIER,            "," MNTOPT_BARRIER },
                { XFS_MOUNT_IDELETE,            "," MNTOPT_NOIKEEP },
                { 0, NULL }
        };
@@ -1914,33 +1924,28 @@ xfs_showargs(
        if (mp->m_logbufs > 0)
                seq_printf(m, "," MNTOPT_LOGBUFS "=%d", mp->m_logbufs);
        if (mp->m_logbsize > 0)
                seq_printf(m, "," MNTOPT_LOGBSIZE "=%dk", mp->m_logbsize >> 10);
        if (mp->m_logname)
                seq_printf(m, "," MNTOPT_LOGDEV "=%s", mp->m_logname);
        if (mp->m_rtname)
                seq_printf(m, "," MNTOPT_RTDEV "=%s", mp->m_rtname);
        if (mp->m_dalign > 0)
                seq_printf(m, "," MNTOPT_SUNIT "=%d",
                                (int)XFS_FSB_TO_BB(mp, mp->m_dalign));
        if (mp->m_swidth > 0)
                seq_printf(m, "," MNTOPT_SWIDTH "=%d",
                                (int)XFS_FSB_TO_BB(mp, mp->m_swidth));
-        if (!(mp->m_flags & XFS_MOUNT_COMPAT_ATTR))
-                seq_printf(m, "," MNTOPT_ATTR2);
        if (!(mp->m_flags & XFS_MOUNT_COMPAT_IOSIZE))
                seq_printf(m, "," MNTOPT_LARGEIO);
+        if (mp->m_flags & XFS_MOUNT_BARRIER)
+                seq_printf(m, "," MNTOPT_BARRIER);
        if (!(vfsp->vfs_flag & VFS_32BITINODES))
                seq_printf(m, "," MNTOPT_64BITINODE);
        if (vfsp->vfs_flag & VFS_GRPID)
                seq_printf(m, "," MNTOPT_GRPID);
@@ -1959,6 +1964,7 @@ xfs_freeze(
        /* Push the superblock and write an unmount record */
        xfs_log_unmount_write(mp);
        xfs_unmountfs_writesb(mp);
+        xfs_fs_log_dummy(mp);
 }
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index e03fa2a3d5ed..eaab355f5a89 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -15,6 +15,9 @@
 * along with this program; if not, write the Free Software Foundation,
 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
+#include <linux/capability.h>
 #include "xfs.h"
 #include "xfs_fs.h"
 #include "xfs_types.h"
@@ -182,8 +185,7 @@ xfs_getattr(
                break;
        }
-        vap->va_atime.tv_sec = ip->i_d.di_atime.t_sec;
+        vn_atime_to_timespec(vp, &vap->va_atime);
-        vap->va_atime.tv_nsec = ip->i_d.di_atime.t_nsec;
        vap->va_mtime.tv_sec = ip->i_d.di_mtime.t_sec;
        vap->va_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec;
        vap->va_ctime.tv_sec = ip->i_d.di_ctime.t_sec;
@@ -336,7 +338,7 @@ xfs_setattr(
                code = XFS_QM_DQVOPALLOC(mp, ip, uid, gid, projid, qflags,
                                         &udqp, &gdqp);
                if (code)
-                        return (code);
+                        return code;
        }
        /*
@@ -541,24 +543,6 @@ xfs_setattr(
                }
                /*
-                 * Can't set extent size unless the file is marked, or
-                 * about to be marked as a realtime file.
-                 *
-                 * This check will be removed when fixed size extents
-                 * with buffered data writes is implemented.
-                 *
-                 */
-                if ((mask & XFS_AT_EXTSIZE)                     &&
-                    ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) !=
-                     vap->va_extsize) &&
-                    (!((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ||
-                       ((mask & XFS_AT_XFLAGS) &&
-                        (vap->va_xflags & XFS_XFLAG_REALTIME))))) {
-                        code = XFS_ERROR(EINVAL);
-                        goto error_return;
-                }
-                /*
                 * Can't change realtime flag if any extents are allocated.
                 */
                if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
@@ -820,13 +804,17 @@ xfs_setattr(
                                        di_flags |= XFS_DIFLAG_RTINHERIT;
                                if (vap->va_xflags & XFS_XFLAG_NOSYMLINKS)
                                        di_flags |= XFS_DIFLAG_NOSYMLINKS;
-                        } else {
+                                if (vap->va_xflags & XFS_XFLAG_EXTSZINHERIT)
+                                        di_flags |= XFS_DIFLAG_EXTSZINHERIT;
+                        } else if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) {
                                if (vap->va_xflags & XFS_XFLAG_REALTIME) {
                                        di_flags |= XFS_DIFLAG_REALTIME;
                                        ip->i_iocore.io_flags |= XFS_IOCORE_RT;
                                } else {
                                        ip->i_iocore.io_flags &= ~XFS_IOCORE_RT;
                                }
+                                if (vap->va_xflags & XFS_XFLAG_EXTSIZE)
+                                        di_flags |= XFS_DIFLAG_EXTSIZE;
                        }
                        ip->i_d.di_flags = di_flags;
                }
@@ -996,10 +984,6 @@ xfs_readlink(
                goto error_return;
        }
-        if (!(ioflags & IO_INVIS)) {
-                xfs_ichgtime(ip, XFS_ICHGTIME_ACC);
-        }
        /*
         * See if the symlink is stored inline.
         */
@@ -1043,11 +1027,8 @@ xfs_readlink(
        }
 error_return:
        xfs_iunlock(ip, XFS_ILOCK_SHARED);
        return error;
 }
@@ -1222,7 +1203,7 @@ xfs_inactive_free_eofblocks(
        last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
        map_len = last_fsb - end_fsb;
        if (map_len <= 0)
-                return (0);
+                return 0;
        nimaps = 1;
        xfs_ilock(ip, XFS_ILOCK_SHARED);
@@ -1231,12 +1212,13 @@ xfs_inactive_free_eofblocks(
        xfs_iunlock(ip, XFS_ILOCK_SHARED);
        if (!error && (nimaps != 0) &&
-            (imap.br_startblock != HOLESTARTBLOCK)) {
+            (imap.br_startblock != HOLESTARTBLOCK ||
+             ip->i_delayed_blks)) {
                /*
                 * Attach the dquots to the inode up front.
                 */
                if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
-                        return (error);
+                        return error;
                /*
                 * There are blocks after the end of file.
@@ -1264,7 +1246,7 @@ xfs_inactive_free_eofblocks(
                        ASSERT(XFS_FORCED_SHUTDOWN(mp));
                        xfs_trans_cancel(tp, 0);
                        xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-                        return (error);
+                        return error;
                }
                xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -1292,7 +1274,7 @@ xfs_inactive_free_eofblocks(
                }
                xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
        }
-        return (error);
+        return error;
 }
 /*
@@ -1470,7 +1452,7 @@ xfs_inactive_symlink_local(
        if (error) {
                xfs_trans_cancel(*tpp, 0);
                *tpp = NULL;
-                return (error);
+                return error;
        }
        xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
@@ -1483,7 +1465,7 @@ xfs_inactive_symlink_local(
                                  XFS_DATA_FORK);
                ASSERT(ip->i_df.if_bytes == 0);
        }
-        return (0);
+        return 0;
 }
 /*
@@ -1509,7 +1491,7 @@ xfs_inactive_attrs(
        if (error) {
                *tpp = NULL;
                xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-                return (error); /* goto out*/
+                return error; /* goto out */
        }
        tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
@@ -1522,7 +1504,7 @@ xfs_inactive_attrs(
                xfs_trans_cancel(tp, 0);
                *tpp = NULL;
                xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-                return (error);
+                return error;
        }
        xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -1533,7 +1515,7 @@ xfs_inactive_attrs(
        ASSERT(ip->i_d.di_anextents == 0);
        *tpp = tp;
-        return (0);
+        return 0;
 }
 STATIC int
@@ -1566,11 +1548,13 @@ xfs_release(
        if (ip->i_d.di_nlink != 0) {
                if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
-                     ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0)) &&
+                     ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0 ||
+                       ip->i_delayed_blks > 0)) &&
                     (ip->i_df.if_flags & XFS_IFEXTENTS))  &&
-                    (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)))) {
+                    (!(ip->i_d.di_flags &
+                                (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
                        if ((error = xfs_inactive_free_eofblocks(mp, ip)))
-                                return (error);
+                                return error;
                        /* Update linux inode block count after free above */
                        LINVFS_GET_IP(vp)->i_blocks = XFS_FSB_TO_BB(mp,
                                ip->i_d.di_nblocks + ip->i_delayed_blks);
@@ -1625,7 +1609,8 @@ xfs_inactive(
         * only one with a reference to the inode.
         */
        truncate = ((ip->i_d.di_nlink == 0) &&
-            ((ip->i_d.di_size != 0) || (ip->i_d.di_nextents > 0)) &&
+            ((ip->i_d.di_size != 0) || (ip->i_d.di_nextents > 0) ||
+             (ip->i_delayed_blks > 0)) &&
            ((ip->i_d.di_mode & S_IFMT) == S_IFREG));
        mp = ip->i_mount;
@@ -1643,12 +1628,14 @@ xfs_inactive(
        if (ip->i_d.di_nlink != 0) {
                if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
-                     ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0)) &&
+                     ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0 ||
-                     (ip->i_df.if_flags & XFS_IFEXTENTS))  &&
+                       ip->i_delayed_blks > 0)) &&
-                    (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)) ||
+                      (ip->i_df.if_flags & XFS_IFEXTENTS) &&
-                     (ip->i_delayed_blks != 0))) {
+                     (!(ip->i_d.di_flags &
+                                (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) ||
+                      (ip->i_delayed_blks != 0)))) {
                        if ((error = xfs_inactive_free_eofblocks(mp, ip)))
-                                return (VN_INACTIVE_CACHE);
+                                return VN_INACTIVE_CACHE;
                        /* Update linux inode block count after free above */
                        LINVFS_GET_IP(vp)->i_blocks = XFS_FSB_TO_BB(mp,
                                ip->i_d.di_nblocks + ip->i_delayed_blks);
@@ -1659,7 +1646,7 @@ xfs_inactive(
        ASSERT(ip->i_d.di_nlink == 0);
        if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
-                return (VN_INACTIVE_CACHE);
+                return VN_INACTIVE_CACHE;
        tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
        if (truncate) {
@@ -1682,7 +1669,7 @@ xfs_inactive(
                        ASSERT(XFS_FORCED_SHUTDOWN(mp));
                        xfs_trans_cancel(tp, 0);
                        xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-                        return (VN_INACTIVE_CACHE);
+                        return VN_INACTIVE_CACHE;
                }
                xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -1703,7 +1690,7 @@ xfs_inactive(
                        xfs_trans_cancel(tp,
                                XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
                        xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
-                        return (VN_INACTIVE_CACHE);
+                        return VN_INACTIVE_CACHE;
                }
        } else if ((ip->i_d.di_mode & S_IFMT) == S_IFLNK) {
@@ -1717,7 +1704,7 @@ xfs_inactive(
                if (error) {
                        ASSERT(tp == NULL);
-                        return (VN_INACTIVE_CACHE);
+                        return VN_INACTIVE_CACHE;
                }
                xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
@@ -1730,7 +1717,7 @@ xfs_inactive(
                if (error) {
                        ASSERT(XFS_FORCED_SHUTDOWN(mp));
                        xfs_trans_cancel(tp, 0);
-                        return (VN_INACTIVE_CACHE);
+                        return VN_INACTIVE_CACHE;
                }
                xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
@@ -1752,7 +1739,7 @@ xfs_inactive(
                 * cancelled, and the inode is unlocked. Just get out.
                 */
                 if (error)
-                         return (VN_INACTIVE_CACHE);
+                         return VN_INACTIVE_CACHE;
        } else if (ip->i_afp) {
                xfs_idestroy_fork(ip, XFS_ATTR_FORK);
        }
@@ -2059,8 +2046,8 @@ std_return:
 abort_return:
        cancel_flags |= XFS_TRANS_ABORT;
        /* FALLTHROUGH */
- error_return:
+ error_return:
        if (tp != NULL)
                xfs_trans_cancel(tp, cancel_flags);
@@ -2590,7 +2577,6 @@ xfs_link(
        int                     cancel_flags;
        int                     committed;
        vnode_t                 *target_dir_vp;
-        bhv_desc_t              *src_bdp;
        int                     resblks;
        char                    *target_name = VNAME(dentry);
        int                     target_namelen;
@@ -2603,8 +2589,7 @@ xfs_link(
        if (VN_ISDIR(src_vp))
                return XFS_ERROR(EPERM);
-        src_bdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(src_vp), &xfs_vnodeops);
+        sip = xfs_vtoi(src_vp);
-        sip = XFS_BHVTOI(src_bdp);
        tdp = XFS_BHVTOI(target_dir_bdp);
        mp = tdp->i_mount;
        if (XFS_FORCED_SHUTDOWN(mp))
@@ -2736,9 +2721,9 @@ std_return:
 abort_return:
        cancel_flags |= XFS_TRANS_ABORT;
        /* FALLTHROUGH */
 error_return:
        xfs_trans_cancel(tp, cancel_flags);
        goto std_return;
 }
 /*
@@ -3211,10 +3196,12 @@ std_return:
        }
        return error;
- error1:
+error1:
        xfs_bmap_cancel(&free_list);
        cancel_flags |= XFS_TRANS_ABORT;
- error_return:
+        /* FALLTHROUGH */
+error_return:
        xfs_trans_cancel(tp, cancel_flags);
        goto std_return;
 }
@@ -3237,7 +3224,6 @@ xfs_readdir(
        xfs_trans_t     *tp = NULL;
        int             error = 0;
        uint            lock_mode;
-        xfs_off_t       start_offset;
        vn_trace_entry(BHV_TO_VNODE(dir_bdp), __FUNCTION__,
                                               (inst_t *)__return_address);
@@ -3248,11 +3234,7 @@ xfs_readdir(
        }
        lock_mode = xfs_ilock_map_shared(dp);
-        start_offset = uiop->uio_offset;
        error = XFS_DIR_GETDENTS(dp->i_mount, tp, dp, uiop, eofp);
-        if (start_offset != uiop->uio_offset) {
-                xfs_ichgtime(dp, XFS_ICHGTIME_ACC);
-        }
        xfs_iunlock_map_shared(dp, lock_mode);
        return error;
 }
@@ -3635,9 +3617,9 @@ xfs_rwlock(
        if (locktype == VRWLOCK_WRITE) {
                xfs_ilock(ip, XFS_IOLOCK_EXCL);
        } else if (locktype == VRWLOCK_TRY_READ) {
-                return (xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED));
+                return xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED);
        } else if (locktype == VRWLOCK_TRY_WRITE) {
-                return (xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL));
+                return xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL);
        } else {
                ASSERT((locktype == VRWLOCK_READ) ||
                       (locktype == VRWLOCK_WRITE_DIRECT));
@@ -3829,7 +3811,12 @@ xfs_reclaim(
        vn_iowait(vp);
        ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
-        ASSERT(VN_CACHED(vp) == 0);
+        /*
+         * Make sure the atime in the XFS inode is correct before freeing the
+         * Linux inode.
+         */
+        xfs_synchronize_atime(ip);
        /* If we have nothing to flush with this inode then complete the
         * teardown now, otherwise break the link between the xfs inode
@@ -3880,7 +3867,7 @@ xfs_finish_reclaim(
                        xfs_ifunlock(ip);
                        xfs_iunlock(ip, XFS_ILOCK_EXCL);
                }
-                return(1);
+                return 1;
        }
        ip->i_flags |= XFS_IRECLAIM;
        write_unlock(&ih->ih_lock);
@@ -3999,42 +3986,36 @@ xfs_alloc_file_space(
        int                     alloc_type,
        int                     attr_flags)
 {
+        xfs_mount_t             *mp = ip->i_mount;
+        xfs_off_t               count;
        xfs_filblks_t           allocated_fsb;
        xfs_filblks_t           allocatesize_fsb;
-        int                     committed;
+        xfs_extlen_t            extsz, temp;
-        xfs_off_t               count;
+        xfs_fileoff_t           startoffset_fsb;
-        xfs_filblks_t           datablocks;
-        int                     error;
        xfs_fsblock_t           firstfsb;
-        xfs_bmap_free_t         free_list;
+        int                     nimaps;
-        xfs_bmbt_irec_t         *imapp;
+        int                     bmapi_flag;
-        xfs_bmbt_irec_t         imaps[1];
+        int                     quota_flag;
-        xfs_mount_t             *mp;
-        int                     numrtextents;
-        int                     reccount;
-        uint                    resblks;
        int                     rt;
-        int                     rtextsize;
-        xfs_fileoff_t           startoffset_fsb;
        xfs_trans_t             *tp;
-        int                     xfs_bmapi_flags;
+        xfs_bmbt_irec_t         imaps[1], *imapp;
+        xfs_bmap_free_t         free_list;
+        uint                    qblocks, resblks, resrtextents;
+        int                     committed;
+        int                     error;
        vn_trace_entry(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address);
-        mp = ip->i_mount;
        if (XFS_FORCED_SHUTDOWN(mp))
                return XFS_ERROR(EIO);
-        /*
+        rt = XFS_IS_REALTIME_INODE(ip);
-         * determine if this is a realtime file
+        if (unlikely(rt)) {
-         */
+                if (!(extsz = ip->i_d.di_extsize))
-        if ((rt = XFS_IS_REALTIME_INODE(ip)) != 0) {
+                        extsz = mp->m_sb.sb_rextsize;
-                if (ip->i_d.di_extsize)
+        } else {
-                        rtextsize = ip->i_d.di_extsize;
+                extsz = ip->i_d.di_extsize;
-                else
+        }
-                        rtextsize = mp->m_sb.sb_rextsize;
-        } else
-                rtextsize = 0;
        if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
                return error;
@@ -4045,8 +4026,8 @@ xfs_alloc_file_space(
        count = len;
        error = 0;
        imapp = &imaps[0];
-        reccount = 1;
+        nimaps = 1;
-        xfs_bmapi_flags = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0);
+        bmapi_flag = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0);
        startoffset_fsb = XFS_B_TO_FSBT(mp, offset);
        allocatesize_fsb = XFS_B_TO_FSB(mp, count);
@@ -4063,47 +4044,55 @@ xfs_alloc_file_space(
                        offset, end_dmi_offset - offset,
                        0, NULL);
                if (error)
-                        return(error);
+                        return error;
        }
        /*
-         * allocate file space until done or until there is an error
+         * Allocate file space until done or until there is an error
         */
 retry:
        while (allocatesize_fsb && !error) {
+                xfs_fileoff_t   s, e;
                /*
-                 * determine if reserving space on
+                 * Determine space reservations for data/realtime.
-                 * the data or realtime partition.
                 */
-                if (rt) {
+                if (unlikely(extsz)) {
-                        xfs_fileoff_t s, e;
                        s = startoffset_fsb;
-                        do_div(s, rtextsize);
+                        do_div(s, extsz);
-                        s *= rtextsize;
+                        s *= extsz;
-                        e = roundup_64(startoffset_fsb + allocatesize_fsb,
+                        e = startoffset_fsb + allocatesize_fsb;
-                                rtextsize);
+                        if ((temp = do_mod(startoffset_fsb, extsz)))
-                        numrtextents = (int)(e - s) / mp->m_sb.sb_rextsize;
+                                e += temp;
-                        datablocks = 0;
+                        if ((temp = do_mod(e, extsz)))
+                                e += extsz - temp;
                } else {
-                        datablocks = allocatesize_fsb;
+                        s = 0;
-                        numrtextents = 0;
+                        e = allocatesize_fsb;
+                }
+                if (unlikely(rt)) {
+                        resrtextents = qblocks = (uint)(e - s);
+                        resrtextents /= mp->m_sb.sb_rextsize;
+                        resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
+                        quota_flag = XFS_QMOPT_RES_RTBLKS;
+                } else {
+                        resrtextents = 0;
+                        resblks = qblocks = \
+                                XFS_DIOSTRAT_SPACE_RES(mp, (uint)(e - s));
+                        quota_flag = XFS_QMOPT_RES_REGBLKS;
                }
                /*
-                 * allocate and setup the transaction
+                 * Allocate and setup the transaction.
                 */
                tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
-                resblks = XFS_DIOSTRAT_SPACE_RES(mp, datablocks);
+                error = xfs_trans_reserve(tp, resblks,
-                error = xfs_trans_reserve(tp,
+                                          XFS_WRITE_LOG_RES(mp), resrtextents,
-                                          resblks,
-                                          XFS_WRITE_LOG_RES(mp),
-                                          numrtextents,
                                          XFS_TRANS_PERM_LOG_RES,
                                          XFS_WRITE_LOG_COUNT);
                /*
-                 * check for running out of space
+                 * Check for running out of space
                 */
                if (error) {
                        /*
@@ -4114,8 +4103,8 @@ retry:
                        break;
                }
                xfs_ilock(ip, XFS_ILOCK_EXCL);
-                error = XFS_TRANS_RESERVE_QUOTA(mp, tp,
+                error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip,
-                                ip->i_udquot, ip->i_gdquot, resblks, 0, 0);
+                                                      qblocks, 0, quota_flag);
                if (error)
                        goto error1;
@@ -4123,19 +4112,19 @@ retry:
                xfs_trans_ihold(tp, ip);
                /*
-                 * issue the bmapi() call to allocate the blocks
+                 * Issue the xfs_bmapi() call to allocate the blocks
                 */
                XFS_BMAP_INIT(&free_list, &firstfsb);
                error = xfs_bmapi(tp, ip, startoffset_fsb,
-                                  allocatesize_fsb, xfs_bmapi_flags,
+                                  allocatesize_fsb, bmapi_flag,
-                                  &firstfsb, 0, imapp, &reccount,
+                                  &firstfsb, 0, imapp, &nimaps,
                                  &free_list);
                if (error) {
                        goto error0;
                }
                /*
-                 * complete the transaction
+                 * Complete the transaction
                 */
                error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed);
                if (error) {
@@ -4150,7 +4139,7 @@ retry:
                allocated_fsb = imapp->br_blockcount;
-                if (reccount == 0) {
+                if (nimaps == 0) {
                        error = XFS_ERROR(ENOSPC);
                        break;
                }
@@ -4173,9 +4162,11 @@ dmapi_enospc_check:
        return error;
- error0:
+error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
        xfs_bmap_cancel(&free_list);
- error1:
+        XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag);
+error1: /* Just cancel transaction */
        xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
        xfs_iunlock(ip, XFS_ILOCK_EXCL);
        goto dmapi_enospc_check;
@@ -4313,7 +4304,7 @@ xfs_free_file_space(
                                offset, end_dmi_offset - offset,
                                AT_DELAY_FLAG(attr_flags), NULL);
                if (error)
-                        return(error);
+                        return error;
        }
        ASSERT(attr_flags & ATTR_NOLOCK ? attr_flags & ATTR_DMI : 1);
@@ -4420,8 +4411,8 @@ xfs_free_file_space(
                }
                xfs_ilock(ip, XFS_ILOCK_EXCL);
                error = XFS_TRANS_RESERVE_QUOTA(mp, tp,
-                                ip->i_udquot, ip->i_gdquot, resblks, 0, rt ?
+                                ip->i_udquot, ip->i_gdquot, resblks, 0,
-                                XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
+                                XFS_QMOPT_RES_REGBLKS);
                if (error)
                        goto error1;